[PATCH net-next V2 01/10] net/mlx5: Store counters in rbtree instead of list

2016-07-14 Thread Saeed Mahameed
From: Amir Vadai <a...@vadai.me>

In order to use bulk counters, we need to have counters sorted by id.

Signed-off-by: Amir Vadai <a...@vadai.me>
Reviewed-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h  |  1 +
 .../net/ethernet/mellanox/mlx5/core/fs_counters.c  | 64 ++
 include/linux/mlx5/driver.h|  2 +-
 3 files changed, 56 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index d7ba91a..9cffb6a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -111,6 +111,7 @@ struct mlx5_fc_cache {
 };
 
 struct mlx5_fc {
+   struct rb_node node;
struct list_head list;
 
/* last{packets,bytes} members are used when calculating the delta since
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index 164dc37..aaf8fd1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -32,6 +32,7 @@
 
 #include 
 #include 
+#include 
 #include "mlx5_core.h"
 #include "fs_core.h"
 #include "fs_cmd.h"
@@ -68,6 +69,27 @@
  *   elapsed, the thread will actually query the hardware.
  */
 
+static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter)
+{
+   struct rb_node **new = >rb_node;
+   struct rb_node *parent = NULL;
+
+   while (*new) {
+   struct mlx5_fc *this = container_of(*new, struct mlx5_fc, node);
+   int result = counter->id - this->id;
+
+   parent = *new;
+   if (result < 0)
+   new = &((*new)->rb_left);
+   else
+   new = &((*new)->rb_right);
+   }
+
+   /* Add new node and rebalance tree. */
+   rb_link_node(>node, parent, new);
+   rb_insert_color(>node, root);
+}
+
 static void mlx5_fc_stats_work(struct work_struct *work)
 {
struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
@@ -75,25 +97,35 @@ static void mlx5_fc_stats_work(struct work_struct *work)
struct mlx5_fc_stats *fc_stats = >priv.fc_stats;
unsigned long now = jiffies;
struct mlx5_fc *counter;
-   struct mlx5_fc *tmp;
+   struct rb_node *node;
+   LIST_HEAD(tmplist);
int err = 0;
 
spin_lock(_stats->addlist_lock);
 
-   list_splice_tail_init(_stats->addlist, _stats->list);
+   list_splice_tail_init(_stats->addlist, );
 
-   if (!list_empty(_stats->list))
+   if (!list_empty() || !RB_EMPTY_ROOT(_stats->counters))
queue_delayed_work(fc_stats->wq, _stats->work, 
MLX5_FC_STATS_PERIOD);
 
spin_unlock(_stats->addlist_lock);
 
-   list_for_each_entry_safe(counter, tmp, _stats->list, list) {
-   struct mlx5_fc_cache *c = >cache;
+   list_for_each_entry(counter, , list)
+   mlx5_fc_stats_insert(_stats->counters, counter);
+
+   node = rb_first(_stats->counters);
+   while (node) {
+   struct mlx5_fc_cache *c;
u64 packets;
u64 bytes;
 
+   counter = rb_entry(node, struct mlx5_fc, node);
+   c = >cache;
+
+   node = rb_next(node);
+
if (counter->deleted) {
-   list_del(>list);
+   rb_erase(>node, _stats->counters);
 
mlx5_cmd_fc_free(dev, counter->id);
 
@@ -176,7 +208,7 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
 {
struct mlx5_fc_stats *fc_stats = >priv.fc_stats;
 
-   INIT_LIST_HEAD(_stats->list);
+   fc_stats->counters = RB_ROOT;
INIT_LIST_HEAD(_stats->addlist);
spin_lock_init(_stats->addlist_lock);
 
@@ -194,20 +226,32 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
struct mlx5_fc_stats *fc_stats = >priv.fc_stats;
struct mlx5_fc *counter;
struct mlx5_fc *tmp;
+   struct rb_node *node;
 
cancel_delayed_work_sync(>priv.fc_stats.work);
destroy_workqueue(dev->priv.fc_stats.wq);
dev->priv.fc_stats.wq = NULL;
 
-   list_splice_tail_init(_stats->addlist, _stats->list);
-
-   list_for_each_entry_safe(counter, tmp, _stats->list, list) {
+   list_for_each_entry_safe(counter, tmp, _stats->addlist, list) {
list_del(>list);
 
mlx5_cmd_fc_free(dev, counter->id);
 
kfree(counter);
}
+
+   node = rb_first(_stats->counters);
+   while (node) {
+   counter = rb_entry(node,

[PATCH net-next V2 02/10] net/mlx5: Introduce bulk reading of flow counters

2016-07-14 Thread Saeed Mahameed
From: Amir Vadai <a...@vadai.me>

This commit utilize the ability of ConnectX-4 to bulk read flow counters.
Few bulk counter queries could be done instead of issuing thousands of
firmware commands per second to get statistics of all flows set to HW,
such as those programmed when we offload tc filters.

Counters are stored sorted by hardware id, and queried in blocks (id +
number of counters).

Due to hardware requirement, start of block and number of counters in a
block must be four aligned.

Reviewed-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Amir Vadai <a...@vadai.me>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c   | 67 +
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h   | 12 
 .../net/ethernet/mellanox/mlx5/core/fs_counters.c  | 83 --
 include/linux/mlx5/mlx5_ifc.h  |  8 ++-
 4 files changed, 146 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index a5bb6b6..9134010 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -413,3 +413,70 @@ int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id,
 
return 0;
 }
+
+struct mlx5_cmd_fc_bulk {
+   u16 id;
+   int num;
+   int outlen;
+   u32 out[0];
+};
+
+struct mlx5_cmd_fc_bulk *
+mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num)
+{
+   struct mlx5_cmd_fc_bulk *b;
+   int outlen = sizeof(*b) +
+   MLX5_ST_SZ_BYTES(query_flow_counter_out) +
+   MLX5_ST_SZ_BYTES(traffic_counter) * num;
+
+   b = kzalloc(outlen, GFP_KERNEL);
+   if (!b)
+   return NULL;
+
+   b->id = id;
+   b->num = num;
+   b->outlen = outlen;
+
+   return b;
+}
+
+void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b)
+{
+   kfree(b);
+}
+
+int
+mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b)
+{
+   u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)];
+
+   memset(in, 0, sizeof(in));
+
+   MLX5_SET(query_flow_counter_in, in, opcode,
+MLX5_CMD_OP_QUERY_FLOW_COUNTER);
+   MLX5_SET(query_flow_counter_in, in, op_mod, 0);
+   MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id);
+   MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num);
+
+   return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
+ b->out, b->outlen);
+}
+
+void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
+ struct mlx5_cmd_fc_bulk *b, u16 id,
+ u64 *packets, u64 *bytes)
+{
+   int index = id - b->id;
+   void *stats;
+
+   if (index < 0 || index >= b->num) {
+   mlx5_core_warn(dev, "Flow counter id (0x%x) out of range 
(0x%x..0x%x). Counter ignored.\n",
+  id, b->id, b->id + b->num - 1);
+   return;
+   }
+
+   stats = MLX5_ADDR_OF(query_flow_counter_out, b->out,
+flow_statistics[index]);
+   *packets = MLX5_GET64(traffic_counter, stats, packets);
+   *bytes = MLX5_GET64(traffic_counter, stats, octets);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index fc4f7b8..158844c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -76,4 +76,16 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id);
 int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id);
 int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id,
  u64 *packets, u64 *bytes);
+
+struct mlx5_cmd_fc_bulk;
+
+struct mlx5_cmd_fc_bulk *
+mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num);
+void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b);
+int
+mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b);
+void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
+ struct mlx5_cmd_fc_bulk *b, u16 id,
+ u64 *packets, u64 *bytes);
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index aaf8fd1..c2877e9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -90,16 +90,66 @@ static void mlx5_fc_stats_insert(struct rb_root *root, 
struct mlx5_fc *counter)
rb_insert_color(>node, root);
 }
 
+static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
+  struct mlx5_fc *first,
+  u16 last_id)
+{
+   struct mlx5_cmd_fc_bu

[PATCH net-next V2 05/10] net/mlx5: E-Switch, Add API to configure rules for the offloaded mode

2016-07-14 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

This allows for upper levels in the driver, e.g the TC offload code to add
e-switch offloaded steering rules. The caller provides the rule spec for
matching, action, source and destination vports.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  6 +++
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 43 ++
 2 files changed, 49 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 035e536..c0b0560 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -222,6 +222,12 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 int vport,
 struct ifla_vf_stats *vf_stats);
 
+struct mlx5_flow_spec;
+
+struct mlx5_flow_rule *
+mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
+   struct mlx5_flow_spec *spec,
+   u32 action, u32 src_vport, u32 dst_vport);
 struct mlx5_flow_rule *
 mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 
tirn);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 27122c0..a357e8e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -43,6 +43,49 @@ enum {
FDB_SLOW_PATH
 };
 
+struct mlx5_flow_rule *
+mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
+   struct mlx5_flow_spec *spec,
+   u32 action, u32 src_vport, u32 dst_vport)
+{
+   struct mlx5_flow_destination dest = { 0 };
+   struct mlx5_fc *counter = NULL;
+   struct mlx5_flow_rule *rule;
+   void *misc;
+
+   if (esw->mode != SRIOV_OFFLOADS)
+   return ERR_PTR(-EOPNOTSUPP);
+
+   if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+   dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+   dest.vport_num = dst_vport;
+   action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+   } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+   counter = mlx5_fc_create(esw->dev, true);
+   if (IS_ERR(counter))
+   return ERR_CAST(counter);
+   dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+   dest.counter = counter;
+   }
+
+   misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, 
misc_parameters);
+   MLX5_SET(fte_match_set_misc, misc, source_port, src_vport);
+
+   misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 
misc_parameters);
+   MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+   spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
+ MLX5_MATCH_MISC_PARAMETERS;
+
+   rule = mlx5_add_flow_rule((struct mlx5_flow_table *)esw->fdb_table.fdb,
+ spec, action, 0, );
+
+   if (IS_ERR(rule))
+   mlx5_fc_destroy(esw->dev, counter);
+
+   return rule;
+}
+
 static struct mlx5_flow_rule *
 mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 
sqn)
 {
-- 
2.8.0



[PATCH net-next V2 03/10] net/mlx5e: Offload TC flow counters only when supported

2016-07-14 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Currenly, the code that programs the flow actions into the firmware
doesn't check if was actually asked to offload the statistics, fix that.

Fixes: aad7e08d39bd ('net/mlx5e: Hardware offloaded flower filter statistics 
support')
Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 3261e8b..cd58fc8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -62,7 +62,7 @@ static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct 
mlx5e_priv *priv,
if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest.ft = priv->fs.vlan.ft.t;
-   } else {
+   } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
counter = mlx5_fc_create(dev, true);
if (IS_ERR(counter))
return ERR_CAST(counter);
-- 
2.8.0



[PATCH net-next V2 07/10] net/switchdev: Export the same parent ID service function

2016-07-14 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

This helper serves to know if two switchdev port netdevices belong to the
same HW ASIC, e.g to figure out if forwarding offload is possible between them.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 include/net/switchdev.h   | 8 
 net/switchdev/switchdev.c | 5 +++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 985619a..9023e3e 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -227,6 +227,8 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
 struct net_device *group_dev,
 bool joining);
 
+bool switchdev_port_same_parent_id(struct net_device *a,
+  struct net_device *b);
 #else
 
 static inline void switchdev_deferred_process(void)
@@ -351,6 +353,12 @@ static inline void switchdev_port_fwd_mark_set(struct 
net_device *dev,
 {
 }
 
+static inline bool switchdev_port_same_parent_id(struct net_device *a,
+struct net_device *b)
+{
+   return false;
+}
+
 #endif
 
 #endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 59658b2..a5fc9dd 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -1286,8 +1286,8 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi)
 }
 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
 
-static bool switchdev_port_same_parent_id(struct net_device *a,
- struct net_device *b)
+bool switchdev_port_same_parent_id(struct net_device *a,
+  struct net_device *b)
 {
struct switchdev_attr a_attr = {
.orig_dev = a,
@@ -1323,6 +1323,7 @@ static u32 switchdev_port_fwd_mark_get(struct net_device 
*dev,
 
return dev->ifindex;
 }
+EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id);
 
 static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
  u32 old_mark, u32 *reset_mark)
-- 
2.8.0



[PATCH net-next V2 08/10] net/mlx5e: Add TC drop and mirred/redirect action parsing for SRIOV offloads

2016-07-14 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Add the setup code that parses the TC actions needed to support offloading drop
and mirred/redirect for SRIOV e-switch. We can redirect between two devices if
they belong to the same HW switch, compare the switchdev HW ID attribute to
enforce that.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 53 +
 1 file changed, 53 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 57b76f7..9a66441 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -37,8 +37,11 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include "en.h"
 #include "en_tc.h"
+#include "eswitch.h"
 
 struct mlx5e_tc_flow {
struct rhash_head   node;
@@ -339,6 +342,56 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, 
struct tcf_exts *exts,
return 0;
 }
 
+static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
+   u32 *action, u32 *dest_vport)
+{
+   const struct tc_action *a;
+
+   if (tc_no_actions(exts))
+   return -EINVAL;
+
+   *action = 0;
+
+   tc_for_each_action(a, exts) {
+   /* Only support a single action per rule */
+   if (*action)
+   return -EINVAL;
+
+   if (is_tcf_gact_shot(a)) {
+   *action = MLX5_FLOW_CONTEXT_ACTION_DROP |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+   continue;
+   }
+
+   if (is_tcf_mirred_redirect(a)) {
+   int ifindex = tcf_mirred_ifindex(a);
+   struct net_device *out_dev;
+   struct mlx5e_priv *out_priv;
+   struct mlx5_eswitch_rep *out_rep;
+
+   out_dev = __dev_get_by_index(dev_net(priv->netdev), 
ifindex);
+
+   if (!switchdev_port_same_parent_id(priv->netdev, 
out_dev)) {
+   pr_err("devices %s %s not on same switch HW, 
can't offload forwarding\n",
+  priv->netdev->name, out_dev->name);
+   return -EINVAL;
+   }
+
+   out_priv = netdev_priv(out_dev);
+   out_rep  = out_priv->ppriv;
+   if (out_rep->vport == 0)
+   *dest_vport = FDB_UPLINK_VPORT;
+   else
+   *dest_vport = out_rep->vport;
+   *action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+   continue;
+   }
+
+   return -EINVAL;
+   }
+   return 0;
+}
+
 int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
   struct tc_cls_flower_offload *f)
 {
-- 
2.8.0



[PATCH net-next V2 04/10] net/mlx5: E-Switch, Use two priorities for SRIOV offloads mode

2016-07-14 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

In the offloads mode, some slow path rules are added by the driver (e.g
send-to-vport), while offloaded rules are to be added from upper layers.

The slow path rules have lower priority and we don't want matching on
offloaded rules to suffer from extra steering hops related to the slow
path rules.

We use two priorities, one for offloaded rules (fast path), and one for
the control rules (slow path). To allow for that, we enable two priorities
for the FDB namespace in the FS core code.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  1 +
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 34 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  | 22 +-
 3 files changed, 42 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 7b45e6a..035e536 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -145,6 +145,7 @@ struct mlx5_eswitch_fdb {
} legacy;
 
struct offloads_fdb {
+   struct mlx5_flow_table *fdb;
struct mlx5_flow_group *send_to_vport_grp;
struct mlx5_flow_group *miss_grp;
struct mlx5_flow_rule  *miss_rule;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 1842dfb..27122c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -38,6 +38,11 @@
 #include "mlx5_core.h"
 #include "eswitch.h"
 
+enum {
+   FDB_FAST_PATH = 0,
+   FDB_SLOW_PATH
+};
+
 static struct mlx5_flow_rule *
 mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 
sqn)
 {
@@ -149,7 +154,7 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
dest.vport_num = 0;
 
-   flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, spec,
+   flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec,
   MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
   0, );
if (IS_ERR(flow_rule)) {
@@ -165,6 +170,8 @@ out:
 }
 
 #define MAX_PF_SQ 256
+#define ESW_OFFLOADS_NUM_ENTRIES (1 << 13) /* 8K */
+#define ESW_OFFLOADS_NUM_GROUPS  4
 
 static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
 {
@@ -190,15 +197,25 @@ static int esw_create_offloads_fdb_table(struct 
mlx5_eswitch *esw, int nvports)
esw_debug(dev, "Create offloads FDB table, log_max_size(%d)\n",
  MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
 
-   table_size = nvports + MAX_PF_SQ + 1;
-   fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0);
+   fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH,
+ ESW_OFFLOADS_NUM_ENTRIES,
+ ESW_OFFLOADS_NUM_GROUPS, 0);
if (IS_ERR(fdb)) {
err = PTR_ERR(fdb);
-   esw_warn(dev, "Failed to create FDB Table err %d\n", err);
-   goto fdb_err;
+   esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", 
err);
+   goto fast_fdb_err;
}
esw->fdb_table.fdb = fdb;
 
+   table_size = nvports + MAX_PF_SQ + 1;
+   fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0);
+   if (IS_ERR(fdb)) {
+   err = PTR_ERR(fdb);
+   esw_warn(dev, "Failed to create slow path FDB Table err %d\n", 
err);
+   goto slow_fdb_err;
+   }
+   esw->fdb_table.offloads.fdb = fdb;
+
/* create send-to-vport group */
memset(flow_group_in, 0, inlen);
MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
@@ -247,8 +264,10 @@ miss_rule_err:
 miss_err:
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
 send_vport_err:
-   mlx5_destroy_flow_table(fdb);
-fdb_err:
+   mlx5_destroy_flow_table(esw->fdb_table.offloads.fdb);
+slow_fdb_err:
+   mlx5_destroy_flow_table(esw->fdb_table.fdb);
+fast_fdb_err:
 ns_err:
kvfree(flow_group_in);
return err;
@@ -264,6 +283,7 @@ static void esw_destroy_offloads_fdb_table(struct 
mlx5_eswitch *esw)
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
 
+   mlx5_destroy_flow_table(esw->fdb_table.offloads.fdb);
mlx5_dest

[PATCH net-next V2 10/10] net/mlx5e: Add TC offload support for the VF representors netdevice

2016-07-14 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

The VF representors support only TC filter/action offloads
(not mqprio) and this is enabled for them by default.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 35 +++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index fdaf2fa..1c7d8b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "eswitch.h"
 #include "en.h"
@@ -222,6 +223,29 @@ static int mlx5e_rep_get_phys_port_name(struct net_device 
*dev,
return 0;
 }
 
+static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle,
+ __be16 proto, struct tc_to_netdev *tc)
+{
+   struct mlx5e_priv *priv = netdev_priv(dev);
+
+   if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
+   return -EOPNOTSUPP;
+
+   switch (tc->type) {
+   case TC_SETUP_CLSFLOWER:
+   switch (tc->cls_flower->command) {
+   case TC_CLSFLOWER_REPLACE:
+   return mlx5e_configure_flower(priv, proto, 
tc->cls_flower);
+   case TC_CLSFLOWER_DESTROY:
+   return mlx5e_delete_flower(priv, tc->cls_flower);
+   case TC_CLSFLOWER_STATS:
+   return mlx5e_stats_flower(priv, tc->cls_flower);
+   }
+   default:
+   return -EOPNOTSUPP;
+   }
+}
+
 static const struct switchdev_ops mlx5e_rep_switchdev_ops = {
.switchdev_port_attr_get= mlx5e_attr_get,
 };
@@ -231,6 +255,7 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = {
.ndo_stop= mlx5e_close,
.ndo_start_xmit  = mlx5e_xmit,
.ndo_get_phys_port_name  = mlx5e_rep_get_phys_port_name,
+   .ndo_setup_tc= mlx5e_rep_ndo_setup_tc,
.ndo_get_stats64 = mlx5e_get_stats,
 };
 
@@ -284,7 +309,8 @@ static void mlx5e_build_rep_netdev(struct net_device 
*netdev)
netdev->switchdev_ops = _rep_switchdev_ops;
 #endif
 
-   netdev->features |= NETIF_F_VLAN_CHALLENGED;
+   netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC;
+   netdev->hw_features  |= NETIF_F_HW_TC;
 
eth_hw_addr_random(netdev);
 }
@@ -328,8 +354,14 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
}
rep->vport_rx_rule = flow_rule;
 
+   err = mlx5e_tc_init(priv);
+   if (err)
+   goto err_del_flow_rule;
+
return 0;
 
+err_del_flow_rule:
+   mlx5_del_flow_rule(rep->vport_rx_rule);
 err_destroy_direct_tirs:
mlx5e_destroy_direct_tirs(priv);
 err_destroy_direct_rqts:
@@ -343,6 +375,7 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
struct mlx5_eswitch_rep *rep = priv->ppriv;
int i;
 
+   mlx5e_tc_cleanup(priv);
mlx5_del_flow_rule(rep->vport_rx_rule);
mlx5e_destroy_direct_tirs(priv);
for (i = 0; i < priv->params.num_channels; i++)
-- 
2.8.0



[PATCH net-next V2 09/10] net/mlx5e: Add TC HW support for FDB (SRIOV e-switch) offloads

2016-07-14 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Enhance the TC offload code such that when the eswitch exists and it's
mode being SRIOV offloads, we do TC actions parsing and setup targeted
for eswitch. Next, we add the offloaded flow to the HW e-switch (fdb).

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c |  5 
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c  | 35 
 2 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 5ef02f0..fdaf2fa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -36,6 +36,7 @@
 
 #include "eswitch.h"
 #include "en.h"
+#include "en_tc.h"
 
 static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
 
@@ -201,6 +202,10 @@ void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw,
 
if (test_bit(MLX5E_STATE_OPENED, >state))
mlx5e_remove_sqs_fwd_rules(priv);
+
+   /* clean (and re-init) existing uplink offloaded TC rules */
+   mlx5e_tc_cleanup(priv);
+   mlx5e_tc_init(priv);
 }
 
 static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 9a66441..0f19b01 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -112,6 +112,22 @@ err_create_ft:
return rule;
 }
 
+static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
+   struct mlx5_flow_spec *spec,
+   u32 action, u32 dst_vport)
+{
+   struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+   struct mlx5_eswitch_rep *rep = priv->ppriv;
+   u32 src_vport;
+
+   if (rep->vport) /* set source vport for the flow */
+   src_vport = rep->vport;
+   else
+   src_vport = FDB_UPLINK_VPORT;
+
+   return mlx5_eswitch_add_offloaded_rule(esw, spec, action, src_vport, 
dst_vport);
+}
+
 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
  struct mlx5_flow_rule *rule)
 {
@@ -397,11 +413,11 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, 
__be16 protocol,
 {
struct mlx5e_tc_table *tc = >fs.tc;
int err = 0;
-   u32 flow_tag;
-   u32 action;
+   u32 flow_tag, action, dest_vport = 0;
struct mlx5e_tc_flow *flow;
struct mlx5_flow_spec *spec;
struct mlx5_flow_rule *old = NULL;
+   struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 
flow = rhashtable_lookup_fast(>ht, >cookie,
  tc->ht_params);
@@ -422,11 +438,18 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, 
__be16 protocol,
if (err < 0)
goto err_free;
 
-   err = parse_tc_nic_actions(priv, f->exts, , _tag);
-   if (err < 0)
-   goto err_free;
+   if (esw && esw->mode == SRIOV_OFFLOADS) {
+   err = parse_tc_fdb_actions(priv, f->exts, , _vport);
+   if (err < 0)
+   goto err_free;
+   flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, action, 
dest_vport);
+   } else {
+   err = parse_tc_nic_actions(priv, f->exts, , _tag);
+   if (err < 0)
+   goto err_free;
+   flow->rule = mlx5e_tc_add_nic_flow(priv, spec, action, 
flow_tag);
+   }
 
-   flow->rule = mlx5e_tc_add_nic_flow(priv, spec, action, flow_tag);
if (IS_ERR(flow->rule)) {
err = PTR_ERR(flow->rule);
goto err_free;
-- 
2.8.0



[PATCH net-next V2 06/10] net/mlx5e: Adjustments in the TC offload code towards reuse for SRIOV

2016-07-14 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Towards reusing the TC offloads code for an SRIOV use-case, change some of the
helper functions to have _nic in their names so it's clear what's NIC unique
and what's general. Also group together the NIC related helpers so we can easily
branch per the use-case in downstream patch.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 32 -
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index cd58fc8..57b76f7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -49,9 +49,9 @@ struct mlx5e_tc_flow {
 #define MLX5E_TC_TABLE_NUM_ENTRIES 1024
 #define MLX5E_TC_TABLE_NUM_GROUPS 4
 
-static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv,
-   struct mlx5_flow_spec *spec,
-   u32 action, u32 flow_tag)
+static struct mlx5_flow_rule *mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
+   struct mlx5_flow_spec *spec,
+   u32 action, u32 flow_tag)
 {
struct mlx5_core_dev *dev = priv->mdev;
struct mlx5_flow_destination dest = { 0 };
@@ -120,7 +120,7 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
 
mlx5_fc_destroy(priv->mdev, counter);
 
-   if (!mlx5e_tc_num_filters(priv)) {
+   if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
mlx5_destroy_flow_table(priv->fs.tc.t);
priv->fs.tc.t = NULL;
}
@@ -295,8 +295,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct 
mlx5_flow_spec *spec
return 0;
 }
 
-static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
-   u32 *action, u32 *flow_tag)
+static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
+   u32 *action, u32 *flow_tag)
 {
const struct tc_action *a;
 
@@ -369,28 +369,28 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, 
__be16 protocol,
if (err < 0)
goto err_free;
 
-   err = parse_tc_actions(priv, f->exts, , _tag);
+   err = parse_tc_nic_actions(priv, f->exts, , _tag);
if (err < 0)
goto err_free;
 
-   err = rhashtable_insert_fast(>ht, >node,
-tc->ht_params);
-   if (err)
-   goto err_free;
-
-   flow->rule = mlx5e_tc_add_flow(priv, spec, action, flow_tag);
+   flow->rule = mlx5e_tc_add_nic_flow(priv, spec, action, flow_tag);
if (IS_ERR(flow->rule)) {
err = PTR_ERR(flow->rule);
-   goto err_hash_del;
+   goto err_free;
}
 
+   err = rhashtable_insert_fast(>ht, >node,
+tc->ht_params);
+   if (err)
+   goto err_del_rule;
+
if (old)
mlx5e_tc_del_flow(priv, old);
 
goto out;
 
-err_hash_del:
-   rhashtable_remove_fast(>ht, >node, tc->ht_params);
+err_del_rule:
+   mlx5_del_flow_rule(flow->rule);
 
 err_free:
if (!old)
-- 
2.8.0



[PATCH net-next V2 00/10] Mellanox 100G mlx5 Bulk flow statistics and SRIOV TC offloads

2016-07-14 Thread Saeed Mahameed
Hi Dave,

This series from Amir and Or deals with two enhancements for the mlx5 TC 
offloads.

The 1st two patches add bulk reading of flow counters. Few bulk counter queries 
are
used instead of issuing thousands firmware commands per second to get 
statistics of all
flows set to HW.

The next patches add TC based SRIOV offloading to mlx5, as a follow up for the 
e-switch
offloads mode and the VF representors. When the e-switch is set to the (new) 
"offloads"
mode, we can now offload TC/flower drop and forward rules, the forward action 
we offload
is TC mirred/redirect.

The above is done by the VF representor netdevices exporting the setup_tc ndo 
where from
there we're re-using and enhancing the existing mlx5 TC offloads sub-module 
which now
works for both the NIC and the SRIOV cases.

The series is applied on top b38a75d2d324 ('mlxsw: core: Trace EMAD messages')
and it has no merge issues with the on-going net submission ('mlx5 tx timeout 
watchdog fixes')

V2:
- Fixed compilation warning.

Thanks,
Saeed.

Amir Vadai (2):
  net/mlx5: Store counters in rbtree instead of list
  net/mlx5: Introduce bulk reading of flow counters

Or Gerlitz (8):
  net/mlx5e: Offload TC flow counters only when supported
  net/mlx5: E-Switch, Use two priorities for SRIOV offloads mode
  net/mlx5: E-Switch, Add API to configure rules for the offloaded mode
  net/mlx5e: Adjustments in the TC offload code towards reuse for SRIOV
  net/switchdev: Export the same parent ID service function
  net/mlx5e: Add TC drop and mirred/redirect action parsing for SRIOV
offloads
  net/mlx5e: Add TC HW support for FDB (SRIOV e-switch) offloads
  net/mlx5e: Add TC offload support for the VF representors netdevice

 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   |  40 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c| 116 ++---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |   7 +
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c |  77 ++-
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c   |  67 ++
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h   |  12 ++
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  |  22 ++--
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h  |   1 +
 .../net/ethernet/mellanox/mlx5/core/fs_counters.c  | 141 -
 include/linux/mlx5/driver.h|   2 +-
 include/linux/mlx5/mlx5_ifc.h  |   8 +-
 include/net/switchdev.h|   8 ++
 net/switchdev/switchdev.c  |   5 +-
 13 files changed, 436 insertions(+), 70 deletions(-)

-- 
2.8.0



Re: mlx5 PF and VF Vlan Restrictions

2016-07-14 Thread Saeed Mahameed
On Thu, Jul 14, 2016 at 11:27 AM, domingo montoya
 wrote:
> Hello,
>
> Can we have VLAN restrictions at both PF and VF level in mlx5 CX4 driver?
>
> For a particular VF, I would like to restrict VLANs from the PF driver
>
> For e.g, Let's say there is VF0. I would like to restrict the allowed
> vlans from the VF0 to be one of 10,20,30.
>
> I would like to do this enforcement at the PF driver.
>
> How do i do this?
>

in mlx5 driver you will need to configure both egress and ingress ACLs
with the allowed vlan list

same as we did in VST but without the HW vlan insertion/stripping.
i.e don't configure the HW (don't call modify_esw_vport_cvlan with non
0 vlan and qos).

today for vst mode we allow one vlan:
- VST vlan On VF TX is enforced via modify_esw_vport_cvlan command to
insert inforced vlan for all VF TX packets
- VST vlan On VF RX is enforced  via ingress ACL (Allowed vlan rule).

You need
- VGT vlan list allowed ACL rules in both ingress and egress ACLs and
drop all other traffic.


>
> So if the VF tries to configure any vlan other than 10,20,30, it should fail.
>
> Also how to communicate the vlans that are allowed to the vf from the PF.

you don't need to communicate them, the vf should initiate VGT Vlans
and try to work in best effort manner, if the PF allows it it will
work.
if not, traffic won't pass.

>
>
> Is this feature already implemented in the mlx5 ?
>

No, missing kernel/userspace API for PF admin to configure allowed vlan list.

>
> If not, can anyone please help me understand how to do that.
>

add new command for iproute to configure allowed vlan list per VF it
should be mutually exclusive with VST.
add new ndo to configure vlan list for VF in device driver, better if
we extend the existing ndo (set_vf_valn).

Thanks,
Saeed.


[PATCH net V2] net/mlx5e: Fix del vxlan port command buffer memset

2016-07-20 Thread Saeed Mahameed
memset the command buffers rather than the pointers to them.

Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling")
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---

V2:
 - Improved log message.

 drivers/net/ethernet/mellanox/mlx5/core/vxlan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c 
b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
index 05de772..e25a73ed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
@@ -72,8 +72,8 @@ static int mlx5e_vxlan_core_del_port_cmd(struct mlx5_core_dev 
*mdev, u16 port)
u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)];
u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)];
 
-   memset(, 0, sizeof(in));
-   memset(, 0, sizeof(out));
+   memset(in, 0, sizeof(in));
+   memset(out, 0, sizeof(out));
 
MLX5_SET(delete_vxlan_udp_dport_in, in, opcode,
 MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
-- 
2.8.0



[PATCH net-next 0/2] Mellanox 100G mlx5 minimum inline header mode

2016-07-20 Thread Saeed Mahameed
Hi Dave,

This small series from Hadar adds the support for minimum inline header mode 
query
in mlx5e NIC driver.

Today on TX the driver copies to the HW descriptor only up to L2 header which 
is the default
required mode and sufficient for today's needs.

The header in the HW descriptor is used for HW loopback steering decision, 
without it packets 
will go directly to the wire with no questions asked.

For TX loopback steering according to L2/L3/L4 headers, ConnectX-4 requires to 
copy the
corresponding headers into the send queue(SQ) WQE HW descriptor so it can 
decide whether to loop it back
or to forward to wire.

For legacy E-Switch mode only L2 headers copy is required.
For advanced steering (E-Switch offloads) more header layers may be required to 
be copied,
the required mode will be advertised by FW to each VF and PF according to the 
corresponding
E-Switch configuration.

Thanks,
Saeed.

Hadar Hen Zion (2):
  net/mlx5e: Check the minimum inline header mode before xmit
  net/mlx5e: Query minimum required header copy during xmit

 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  8 
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 24 +++
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c   | 49 +--
 drivers/net/ethernet/mellanox/mlx5/core/vport.c   | 18 +
 include/linux/mlx5/device.h   |  7 
 include/linux/mlx5/mlx5_ifc.h | 10 +++--
 include/linux/mlx5/vport.h|  2 +
 7 files changed, 111 insertions(+), 7 deletions(-)

-- 
2.8.0



[PATCH net-next 2/2] net/mlx5e: Query minimum required header copy during xmit

2016-07-20 Thread Saeed Mahameed
From: Hadar Hen Zion <had...@mellanox.com>

Add support for query the minimum inline mode from the Firmware.
It is required for correct TX steering according to L3/L4 packet
headers.

Each send queue (SQ) has inline mode that defines the minimal required
headers that needs to be copied into the SQ WQE.
The driver asks the Firmware for the wqe_inline_mode device capability
value.  In case the device capability defined as "vport context" the
driver must check the reported min inline mode from the vport context
before creating its SQs.

Signed-off-by: Hadar Hen Zion <had...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  7 +++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 24 +++
 drivers/net/ethernet/mellanox/mlx5/core/vport.c   | 18 +
 include/linux/mlx5/mlx5_ifc.h | 10 +++---
 include/linux/mlx5/vport.h|  2 ++
 5 files changed, 58 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 2c20c7b..1b495ef 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -129,6 +129,12 @@ static inline int mlx5_max_log_rq_size(int wq_type)
}
 }
 
+enum {
+   MLX5E_INLINE_MODE_L2,
+   MLX5E_INLINE_MODE_VPORT_CONTEXT,
+   MLX5_INLINE_MODE_NOT_REQUIRED,
+};
+
 struct mlx5e_tx_wqe {
struct mlx5_wqe_ctrl_seg ctrl;
struct mlx5_wqe_eth_seg  eth;
@@ -188,6 +194,7 @@ struct mlx5e_params {
bool lro_en;
u32 lro_wqe_sz;
u16 tx_max_inline;
+   u8  tx_min_inline_mode;
u8  rss_hfunc;
u8  toeplitz_hash_key[40];
u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 611ab55..ca7b1e3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -56,6 +56,7 @@ struct mlx5e_sq_param {
u32sqc[MLX5_ST_SZ_DW(sqc)];
struct mlx5_wq_param   wq;
u16max_inline;
+   u8 min_inline_mode;
bool   icosq;
 };
 
@@ -649,6 +650,9 @@ static int mlx5e_create_sq(struct mlx5e_channel *c,
}
sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
sq->max_inline  = param->max_inline;
+   sq->min_inline_mode =
+   MLX5_CAP_ETH(mdev, wqe_inline_mode) == 
MLX5E_INLINE_MODE_VPORT_CONTEXT ?
+   param->min_inline_mode : 0;
 
err = mlx5e_alloc_sq_db(sq, cpu_to_node(c->cpu));
if (err)
@@ -731,6 +735,7 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct 
mlx5e_sq_param *param)
 
MLX5_SET(sqc,  sqc, tis_num_0, param->icosq ? 0 : priv->tisn[sq->tc]);
MLX5_SET(sqc,  sqc, cqn,sq->cq.mcq.cqn);
+   MLX5_SET(sqc,  sqc, min_wqe_inline_mode, sq->min_inline_mode);
MLX5_SET(sqc,  sqc, state,  MLX5_SQC_STATE_RST);
MLX5_SET(sqc,  sqc, tis_lst_sz, param->icosq ? 0 : 1);
MLX5_SET(sqc,  sqc, flush_in_error_en,  1);
@@ -1343,6 +1348,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
 
param->max_inline = priv->params.tx_max_inline;
+   param->min_inline_mode = priv->params.tx_min_inline_mode;
 }
 
 static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
@@ -2967,6 +2973,23 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params 
*params, u8 cq_period_mode)
MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE;
 }
 
+static void mlx5e_query_min_inline(struct mlx5_core_dev *mdev,
+  u8 *min_inline_mode)
+{
+   switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) {
+   case MLX5E_INLINE_MODE_L2:
+   *min_inline_mode = MLX5_INLINE_MODE_L2;
+   break;
+   case MLX5E_INLINE_MODE_VPORT_CONTEXT:
+   mlx5_query_nic_vport_min_inline(mdev,
+   min_inline_mode);
+   break;
+   case MLX5_INLINE_MODE_NOT_REQUIRED:
+   *min_inline_mode = MLX5_INLINE_MODE_NONE;
+   break;
+   }
+}
+
 static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
struct net_device *netdev,
const struct mlx5e_profile *profile,
@@ -3032,6 +3055,7 @@ static void mlx5e_build_nic_netdev_priv(struct 
mlx5_core_dev *mdev,
priv->params.tx_cq_moderation.pkts =
MLX5E_PARAMS_DEFAULT_TX_CQ_MOD

[PATCH net-next 1/2] net/mlx5e: Check the minimum inline header mode before xmit

2016-07-20 Thread Saeed Mahameed
From: Hadar Hen Zion <had...@mellanox.com>

Each send queue (SQ) has inline mode that defines the minimal required
inline headers in the SQ WQE.
Before sending each packet check that the minimum required headers
on the WQE are copied.

Signed-off-by: Hadar Hen Zion <had...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h|  1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 49 +++--
 include/linux/mlx5/device.h |  7 
 3 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 4cbd452..2c20c7b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -398,6 +398,7 @@ struct mlx5e_sq {
u32sqn;
u16bf_buf_size;
u16max_inline;
+   u8 min_inline_mode;
u16edge;
struct device *pdev;
struct mlx5e_tstamp   *tstamp;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 5740b46..e073bf59 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -128,6 +128,50 @@ u16 mlx5e_select_queue(struct net_device *dev, struct 
sk_buff *skb,
return priv->channeltc_to_txq_map[channel_ix][up];
 }
 
+static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb)
+{
+#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
+
+   return max(skb_network_offset(skb), MLX5E_MIN_INLINE);
+}
+
+static inline int mlx5e_skb_l3_header_offset(struct sk_buff *skb)
+{
+   struct flow_keys keys;
+
+   if (skb_transport_header_was_set(skb))
+   return skb_transport_offset(skb);
+   else if (skb_flow_dissect_flow_keys(skb, , 0))
+   return keys.control.thoff;
+   else
+   return mlx5e_skb_l2_header_offset(skb);
+}
+
+static inline unsigned int mlx5e_calc_min_inline(enum mlx5_inline_modes mode,
+struct sk_buff *skb)
+{
+   int hlen;
+
+   switch (mode) {
+   case MLX5_INLINE_MODE_TCP_UDP:
+   hlen = eth_get_headlen(skb->data, skb_headlen(skb));
+   if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb))
+   hlen += VLAN_HLEN;
+   return hlen;
+   case MLX5_INLINE_MODE_IP:
+   /* When transport header is set to zero, it means no transport
+* header. When transport header is set to 0xff's, it means
+* transport header wasn't set.
+*/
+   if (skb_transport_offset(skb))
+   return mlx5e_skb_l3_header_offset(skb);
+   /* fall through */
+   case MLX5_INLINE_MODE_L2:
+   default:
+   return mlx5e_skb_l2_header_offset(skb);
+   }
+}
+
 static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq,
struct sk_buff *skb, bool bf)
 {
@@ -135,8 +179,6 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq 
*sq,
 * headers and occur before the data gather.
 * Therefore these headers must be copied into the WQE
 */
-#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
-
if (bf) {
u16 ihs = skb_headlen(skb);
 
@@ -146,8 +188,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq 
*sq,
if (ihs <= sq->max_inline)
return skb_headlen(skb);
}
-
-   return max(skb_network_offset(skb), MLX5E_MIN_INLINE);
+   return mlx5e_calc_min_inline(sq->min_inline_mode, skb);
 }
 
 static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data,
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index e0a3ed7..0b6d15c 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -129,6 +129,13 @@ __mlx5_mask(typ, fld))
tmp;  \
})
 
+enum mlx5_inline_modes {
+   MLX5_INLINE_MODE_NONE,
+   MLX5_INLINE_MODE_L2,
+   MLX5_INLINE_MODE_IP,
+   MLX5_INLINE_MODE_TCP_UDP,
+};
+
 enum {
MLX5_MAX_COMMANDS   = 32,
MLX5_CMD_DATA_BLOCK_SIZE= 512,
-- 
2.8.0



Re: [PATCH net V2] net/mlx5e: Fix del vxlan port command buffer memset

2016-07-21 Thread Saeed Mahameed
On Thu, Jul 21, 2016 at 3:53 AM, Alexei Starovoitov
<alexei.starovoi...@gmail.com> wrote:
> On Thu, Jul 21, 2016 at 12:39:53AM +0300, Saeed Mahameed wrote:
>> memset the command buffers rather than the pointers to them.
>
> that is still wrong commit log.
> This patch makes zero difference to generated code.
> '' is the same as 'in'
>

Who said they are not ? There was a mistake in the original log
message and it was fixed here.
The patch was made to make the code consistent with other places in
the code, and i don't see anything wrong with the log message.


[PATCH net V2] net/bonding: Enforce active-backup policy for IPoIB bonds

2016-07-21 Thread Saeed Mahameed
From: Mark Bloch <ma...@mellanox.com>

When using an IPoIB bond currently only active-backup mode is a valid
use case and this commit strengthens it.

Since commit 2ab82852a270 ("net/bonding: Enable bonding to enslave
netdevices not supporting set_mac_address()") was introduced till
4.7-rc1, IPoIB didn't support the set_mac_address ndo, and hence the
fail over mac policy always applied to IPoIB bonds.

With the introduction of commit 492a7e67ff83 ("IB/IPoIB: Allow setting
the device address"), that doesn't hold and practically IPoIB bonds are
broken as of that. To fix it, lets go to fail over mac if the device
doesn't support the ndo OR this is IPoIB device.

As a by-product, this commit also prevents a stack corruption which
occurred when trying to copy 20 bytes (IPoIB) device address
to a sockaddr struct that has only 16 bytes of storage.

Signed-off-by: Mark Bloch <ma...@mellanox.com>
Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---

Changes from v0:
 - Set res to -EOPNOTSUPP before jumping to err_undo_flags.

 drivers/net/bonding/bond_main.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index a2afa3b..4d79819 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1422,7 +1422,16 @@ int bond_enslave(struct net_device *bond_dev, struct 
net_device *slave_dev)
return -EINVAL;
}
 
-   if (slave_ops->ndo_set_mac_address == NULL) {
+   if (slave_dev->type == ARPHRD_INFINIBAND &&
+   BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
+   netdev_warn(bond_dev, "Type (%d) supports only active-backup 
mode\n",
+   slave_dev->type);
+   res = -EOPNOTSUPP;
+   goto err_undo_flags;
+   }
+
+   if (!slave_ops->ndo_set_mac_address ||
+   slave_dev->type == ARPHRD_INFINIBAND) {
netdev_warn(bond_dev, "The slave device specified does not 
support setting the MAC address\n");
if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP &&
bond->params.fail_over_mac != BOND_FOM_ACTIVE) {
-- 
2.8.0



Re: [PATCH net-next 2/2] net/mlx5e: Query minimum required header copy during xmit

2016-07-21 Thread Saeed Mahameed
On Thu, Jul 21, 2016 at 8:15 AM, Alexei Starovoitov
<alexei.starovoi...@gmail.com> wrote:
> On Thu, Jul 21, 2016 at 01:20:02AM +0300, Saeed Mahameed wrote:
>> From: Hadar Hen Zion <had...@mellanox.com>
>>
>> Add support for query the minimum inline mode from the Firmware.
>> It is required for correct TX steering according to L3/L4 packet
>> headers.
>>
>> Each send queue (SQ) has inline mode that defines the minimal required
>> headers that needs to be copied into the SQ WQE.
>> The driver asks the Firmware for the wqe_inline_mode device capability
>> value.  In case the device capability defined as "vport context" the
>> driver must check the reported min inline mode from the vport context
>> before creating its SQs.
>>
>> Signed-off-by: Hadar Hen Zion <had...@mellanox.com>
>> Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
> ...
>> + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
>> + u32 *out;
>> +
>> + out = mlx5_vzalloc(outlen);
>> + if (!out)
>> + return;
>
> Just discovered this...
> outlen is a small constant here, yet you want to try to vmalloc it?
> What is the point?
> There are 67 places in mlx5 where failed kmalloc is retried with
> vmalloc... was that path ever tested?

The point is that there are a lot of places in the code that want to
allocate huge commands and mlx5_vzalloc is a nice black box that
provides the method to allocate such huge chunks of memory.

Now sometimes people tend to reuse same pieces of code for code consistency.

I don't see any harm from doing that.


Re: [PATCH net-next 2/2] net/mlx5e: Query minimum required header copy during xmit

2016-07-21 Thread Saeed Mahameed
On Thu, Jul 21, 2016 at 7:15 PM, David Miller <da...@davemloft.net> wrote:
> From: Saeed Mahameed <sae...@dev.mellanox.co.il>
> Date: Thu, 21 Jul 2016 11:22:32 +0300
>
>> On Thu, Jul 21, 2016 at 8:15 AM, Alexei Starovoitov
>> <alexei.starovoi...@gmail.com> wrote:
>>> On Thu, Jul 21, 2016 at 01:20:02AM +0300, Saeed Mahameed wrote:
>>>> From: Hadar Hen Zion <had...@mellanox.com>
>>>>
>>>> Add support for query the minimum inline mode from the Firmware.
>>>> It is required for correct TX steering according to L3/L4 packet
>>>> headers.
>>>>
>>>> Each send queue (SQ) has inline mode that defines the minimal required
>>>> headers that needs to be copied into the SQ WQE.
>>>> The driver asks the Firmware for the wqe_inline_mode device capability
>>>> value.  In case the device capability defined as "vport context" the
>>>> driver must check the reported min inline mode from the vport context
>>>> before creating its SQs.
>>>>
>>>> Signed-off-by: Hadar Hen Zion <had...@mellanox.com>
>>>> Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
>>> ...
>>>> + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
>>>> + u32 *out;
>>>> +
>>>> + out = mlx5_vzalloc(outlen);
>>>> + if (!out)
>>>> + return;
>>>
>>> Just discovered this...
>>> outlen is a small constant here, yet you want to try to vmalloc it?
>>> What is the point?
>>> There are 67 places in mlx5 where failed kmalloc is retried with
>>> vmalloc... was that path ever tested?
>>
>> The point is that there are a lot of places in the code that want to
>> allocate huge commands and mlx5_vzalloc is a nice black box that
>> provides the method to allocate such huge chunks of memory.
>
> If it's a "black box" then don't mention that it uses vmalloc in the
> function name.

Right, mlx5_zalloc would make a better name, I will change this in a
future refactoring patch.
but how is this related to this series ?

I am already working on a series for next kernel release to improve
mlx5 command interface, command layout allocation/filling and
execution.
I will be glad to listen to any suggestion you have regarding using
mlx5_vzalloc for small buffers allocation ..

Thanks,
Saeed.


[PATCH net] net/bonding: Enforce active-backup policy for IPoIB bonds

2016-07-20 Thread Saeed Mahameed
From: Mark Bloch <ma...@mellanox.com>

When using an IPoIB bond currently only active-backup mode is a valid
use case and this commit strengthens it.

Since commit 2ab82852a270 ("net/bonding: Enable bonding to enslave
netdevices not supporting set_mac_address()") was introduced till
4.7-rc1, IPoIB didn't support the set_mac_address ndo, and hence
the fail over mac policy always applied to IPoIB bonds.

With the introduction of commit 492a7e67ff83 ("IB/IPoIB: Allow setting
the device address"), that doesn't hold and practically IPoIB bonds are
broken as of that. To fix it, lets go to fail over mac if the device
doesn't support the ndo OR this is IPoIB device.

As a by-product, this commit also prevents a stack corruption which
occurred when trying to copy 20 bytes (IPoIB) device address
to a sockaddr struct that has only 16 bytes of storage.

Signed-off-by: Mark Bloch <ma...@mellanox.com>
Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/bonding/bond_main.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index a2afa3b..ccd4003 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1422,7 +1422,15 @@ int bond_enslave(struct net_device *bond_dev, struct 
net_device *slave_dev)
return -EINVAL;
}
 
-   if (slave_ops->ndo_set_mac_address == NULL) {
+   if (slave_dev->type == ARPHRD_INFINIBAND &&
+   BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
+   netdev_warn(bond_dev, "Type (%d) supports only active-backup 
mode\n",
+   slave_dev->type);
+   goto err_undo_flags;
+   }
+
+   if (!slave_ops->ndo_set_mac_address ||
+   slave_dev->type == ARPHRD_INFINIBAND) {
netdev_warn(bond_dev, "The slave device specified does not 
support setting the MAC address\n");
if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP &&
bond->params.fail_over_mac != BOND_FOM_ACTIVE) {
-- 
2.8.0



[PATCH net] net/mlx5e: Fix del vxlan port command

2016-07-20 Thread Saeed Mahameed
mlx5e_vxlan_core_del_port_cmd cleared the wrong address, the address of
the pointer of the buffer rather than clearing the buffer itself.

This can lead to a serious stack corruption.

Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling")
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/vxlan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c 
b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
index 05de772..e25a73ed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
@@ -72,8 +72,8 @@ static int mlx5e_vxlan_core_del_port_cmd(struct mlx5_core_dev 
*mdev, u16 port)
u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)];
u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)];
 
-   memset(, 0, sizeof(in));
-   memset(, 0, sizeof(out));
+   memset(in, 0, sizeof(in));
+   memset(out, 0, sizeof(out));
 
MLX5_SET(delete_vxlan_udp_dport_in, in, opcode,
 MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
-- 
2.8.0



Re: [PATCH net] net/mlx5e: Fix del vxlan port command

2016-07-20 Thread Saeed Mahameed
On Wed, Jul 20, 2016 at 7:15 PM, Alexei Starovoitov
<alexei.starovoi...@gmail.com> wrote:
> On Wed, Jul 20, 2016 at 05:48:48PM +0300, Saeed Mahameed wrote:
>> mlx5e_vxlan_core_del_port_cmd cleared the wrong address, the address of
>> the pointer of the buffer rather than clearing the buffer itself.
>>
>> This can lead to a serious stack corruption.
>
> The change is fine, but commit log is complete nonsense.
> Nacking to make sure it's adjusted.
> We cannot have such commits in the git history.

That was my hunch too :) , will fix this.


[PATCH net 5/7] net/mlx5e: Prevent adding the same vxlan port

2016-06-27 Thread Saeed Mahameed
From: Matthew Finlay <m...@mellanox.com>

Do not allow the same vxlan udp port to be added to the device more than
once.

Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling")
Signed-off-by: Matthew Finlay <m...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/vxlan.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c 
b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
index f2fd1ef..05de772 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
@@ -105,6 +105,9 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
struct mlx5e_vxlan *vxlan;
int err;
 
+   if (mlx5e_vxlan_lookup_port(priv, port))
+   goto free_work;
+
if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port))
goto free_work;
 
-- 
2.8.0



[PATCH net 7/7] net/mlx5e: Reorganize ethtool statistics

2016-06-27 Thread Saeed Mahameed
From: Gal Pressman <g...@mellanox.com>

Categorize and reorganize ethtool statistics counters by renaming to
"rx_*" and "tx_*" and removing redundant and duplicated counters, this
way they are easier to grasp and more user friendly.

Signed-off-by: Gal Pressman <g...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |  30 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  27 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c|   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 228 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c|   4 +-
 5 files changed, 130 insertions(+), 163 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index c709d41..e667a87 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -213,42 +213,41 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv 
*priv, uint8_t *data)
 
/* SW counters */
for (i = 0; i < NUM_SW_COUNTERS; i++)
-   strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].name);
+   strcpy(data + (idx++) * ETH_GSTRING_LEN, 
sw_stats_desc[i].format);
 
/* Q counters */
for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++)
-   strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].name);
+   strcpy(data + (idx++) * ETH_GSTRING_LEN, 
q_stats_desc[i].format);
 
/* VPORT counters */
for (i = 0; i < NUM_VPORT_COUNTERS; i++)
strcpy(data + (idx++) * ETH_GSTRING_LEN,
-  vport_stats_desc[i].name);
+  vport_stats_desc[i].format);
 
/* PPORT counters */
for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++)
strcpy(data + (idx++) * ETH_GSTRING_LEN,
-  pport_802_3_stats_desc[i].name);
+  pport_802_3_stats_desc[i].format);
 
for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++)
strcpy(data + (idx++) * ETH_GSTRING_LEN,
-  pport_2863_stats_desc[i].name);
+  pport_2863_stats_desc[i].format);
 
for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++)
strcpy(data + (idx++) * ETH_GSTRING_LEN,
-  pport_2819_stats_desc[i].name);
+  pport_2819_stats_desc[i].format);
 
for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
-   sprintf(data + (idx++) * ETH_GSTRING_LEN, "prio%d_%s",
-   prio,
-   pport_per_prio_traffic_stats_desc[i].name);
+   sprintf(data + (idx++) * ETH_GSTRING_LEN,
+   pport_per_prio_traffic_stats_desc[i].format, 
prio);
}
 
pfc_combined = mlx5e_query_pfc_combined(priv);
for_each_set_bit(prio, _combined, NUM_PPORT_PRIO) {
for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
-   sprintf(data + (idx++) * ETH_GSTRING_LEN, "prio%d_%s",
-   prio, pport_per_prio_pfc_stats_desc[i].name);
+   sprintf(data + (idx++) * ETH_GSTRING_LEN,
+   pport_per_prio_pfc_stats_desc[i].format, prio);
}
}
 
@@ -258,16 +257,15 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv 
*priv, uint8_t *data)
/* per channel counters */
for (i = 0; i < priv->params.num_channels; i++)
for (j = 0; j < NUM_RQ_STATS; j++)
-   sprintf(data + (idx++) * ETH_GSTRING_LEN, "rx%d_%s", i,
-   rq_stats_desc[j].name);
+   sprintf(data + (idx++) * ETH_GSTRING_LEN,
+   rq_stats_desc[j].format, i);
 
for (tc = 0; tc < priv->params.num_tc; tc++)
for (i = 0; i < priv->params.num_channels; i++)
for (j = 0; j < NUM_SQ_STATS; j++)
sprintf(data + (idx++) * ETH_GSTRING_LEN,
-   "tx%d_%s",
-   priv->channeltc_to_txq_map[i][tc],
-   sq_stats_desc[j].name);
+   sq_stats_desc[j].format,
+   priv->channeltc_to_txq_map[i][tc]);
 }
 
 static void mlx5e_get_strings(struct net_device *dev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 793d7a1..cb6defd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/c

[PATCH net 6/7] net/mlx5e: Fix number of PFC counters reported to ethtool

2016-06-27 Thread Saeed Mahameed
From: Gal Pressman <g...@mellanox.com>

Number of PFC counters used to count only number of priorities with PFC
enabled, but each priority has more than one counter, hence the need to
multiply it by the number of PFC counters per priority.

Fixes: cf678570d5a1 ('net/mlx5e: Add per priority group to PPort counters')
Signed-off-by: Gal Pressman <g...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index fc7dcc0..c709d41 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -184,7 +184,9 @@ static unsigned long mlx5e_query_pfc_combined(struct 
mlx5e_priv *priv)
 #define MLX5E_NUM_SQ_STATS(priv) \
(NUM_SQ_STATS * priv->params.num_channels * priv->params.num_tc * \
 test_bit(MLX5E_STATE_OPENED, >state))
-#define MLX5E_NUM_PFC_COUNTERS(priv) hweight8(mlx5e_query_pfc_combined(priv))
+#define MLX5E_NUM_PFC_COUNTERS(priv) \
+   (hweight8(mlx5e_query_pfc_combined(priv)) * \
+NUM_PPORT_PER_PRIO_PFC_COUNTERS)
 
 static int mlx5e_get_sset_count(struct net_device *dev, int sset)
 {
-- 
2.8.0



[PATCH net 3/7] net/mlx5e: Change enum to better reflect usage

2016-06-27 Thread Saeed Mahameed
From: Eli Cohen <e...@mellanox.com>

Change MLX5E_STATE_ASYNC_EVENTS_ENABLE to
MLX5E_STATE_ASYNC_EVENTS_ENABLED since it represent a state and not an
operation.

Fixes: acff797cd1874 ('net/mlx5: Extend mlx5_core to support ConnectX-4 
Ethernet functionality')
Signed-off-by: Eli Cohen <e...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index e8a6c33..baa991a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -401,7 +401,7 @@ enum mlx5e_traffic_types {
 };
 
 enum {
-   MLX5E_STATE_ASYNC_EVENTS_ENABLE,
+   MLX5E_STATE_ASYNC_EVENTS_ENABLED,
MLX5E_STATE_OPENED,
MLX5E_STATE_DESTROYING,
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index f5c8d5d..4383f8c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -244,7 +244,7 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, 
void *vpriv,
 {
struct mlx5e_priv *priv = vpriv;
 
-   if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, >state))
+   if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, >state))
return;
 
switch (event) {
@@ -260,12 +260,12 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, 
void *vpriv,
 
 static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
 {
-   set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, >state);
+   set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, >state);
 }
 
 static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
 {
-   clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, >state);
+   clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, >state);
synchronize_irq(mlx5_get_msix_vec(priv->mdev, MLX5_EQ_VEC_ASYNC));
 }
 
-- 
2.8.0



[PATCH net 1/7] net/mlx5: Update command strings

2016-06-27 Thread Saeed Mahameed
From: Eli Cohen <e...@mellanox.com>

Add command string for MODIFY_FLOW_TABLE which is used by the driver.

Signed-off-by: Eli Cohen <e...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index dcd2df6..0b49862 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -545,6 +545,7 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(ALLOC_FLOW_COUNTER);
MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER);
MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER);
+   MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE);
default: return "unknown command opcode";
}
 }
-- 
2.8.0



[PATCH net 4/7] net/mlx5e: Check for BlueFlame capability before allocating SQ uar

2016-06-27 Thread Saeed Mahameed
From: Gal Pressman <g...@mellanox.com>

Previous to this patch mapping was always set to write combining without
checking whether BlueFlame is supported in the device.

Fixes: 0ba422410bbf ('net/mlx5: Fix global UAR mapping')
Signed-off-by: Gal Pressman <g...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 4383f8c..793d7a1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -580,7 +580,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c,
void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
int err;
 
-   err = mlx5_alloc_map_uar(mdev, >uar, true);
+   err = mlx5_alloc_map_uar(mdev, >uar, !!MLX5_CAP_GEN(mdev, bf));
if (err)
return err;
 
-- 
2.8.0



[PATCH net 0/7] Mellanox 100G mlx5 fixes#2 for 4.7-rc

2016-06-27 Thread Saeed Mahameed
Hi Dave,

The following series provides one-liners fixes for mlx5 driver plus one
medium patch to reorganize ethtool counters reporting.

Highlights:
- Added MODIFY_FLOW_TABLE to command strings table
- Add ConnectX-5 PCIe 4.0 to list of supported devices
- Rename ASYNC_EVENTS enum
- Enable BlueFlame only when supported by device
- Avoid adding same vxlan port twice
- Report the correct number of PFC counters
- Reorganize ethtool reported counters and remove duplications

Thanks,
Saeed.

Eli Cohen (2):
  net/mlx5: Update command strings
  net/mlx5e: Change enum to better reflect usage

Gal Pressman (3):
  net/mlx5e: Check for BlueFlame capability before allocating SQ uar
  net/mlx5e: Fix number of PFC counters reported to ethtool
  net/mlx5e: Reorganize ethtool statistics

Majd Dibbiny (1):
  net/mlx5: Add ConnectX-5 PCIe 4.0 to list of supported devices

Matthew Finlay (1):
  net/mlx5e: Prevent adding the same vxlan port

 drivers/net/ethernet/mellanox/mlx5/core/cmd.c  |   1 +
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |   2 +-
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |  34 +--
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  35 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c|   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 228 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c|   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c |   3 +-
 drivers/net/ethernet/mellanox/mlx5/core/vxlan.c|   3 +
 9 files changed, 144 insertions(+), 170 deletions(-)

-- 
2.8.0



[PATCH net 2/7] net/mlx5: Add ConnectX-5 PCIe 4.0 to list of supported devices

2016-06-27 Thread Saeed Mahameed
From: Majd Dibbiny <m...@mellanox.com>

Add the upcoming ConnectX-5 PCIe 4.0 device to the list of
supported devices by the mlx5 driver.

Signed-off-by: Majd Dibbiny <m...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index a19b593..c65f4a1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1508,8 +1508,9 @@ static const struct pci_device_id mlx5_core_pci_table[] = 
{
{ PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF},   /* ConnectX-4 
VF */
{ PCI_VDEVICE(MELLANOX, 0x1015) },  /* ConnectX-4LX 
*/
{ PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF},   /* ConnectX-4LX 
VF */
-   { PCI_VDEVICE(MELLANOX, 0x1017) },  /* ConnectX-5 */
+   { PCI_VDEVICE(MELLANOX, 0x1017) },  /* ConnectX-5, 
PCIe 3.0 */
{ PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF},   /* ConnectX-5 
VF */
+   { PCI_VDEVICE(MELLANOX, 0x1019) },  /* ConnectX-5, 
PCIe 4.0 */
{ 0, }
 };
 
-- 
2.8.0



Re: mlx5 SRIOV VLAN support

2016-07-05 Thread Saeed Mahameed
On Tue, Jul 5, 2016 at 10:37 AM, domingo montoya
 wrote:
> Hi,
>
> Is VLAN supported in SRIOV mode for mlx5?
>

For VGT mode vlan is supported by default same as bare-metal.
For VST you need to use ip tool to configure VST vlan:  ip link set
eth vf 1 vlan 2 qos 2

note: VGT mode will not work on VF if VST is configured on that VF.

>
> Can anyone please help me to:
>
> Enable VLAN guest tagging in mlx5 PF driver?
>
> Do I need to add any E-switch rules to do the same?
>

No,
Nothing is required for VGT mode, just create the vlan on guest VF
machine and work with it!
E-Swtich is automatically/dynamically configured.

>
> My requirement is something like this:
>
>
> mlx5 VF driver<--->mlx5 PF
> driver<->Network<---> mlx5 dedicated
> driver(non-sriov)
>
>
>
> I would like to be able to send ICMP traffic between mlx5 VF driver
> and mlx5 dedicated driver.
>
> My understanding is I should be able to do without adding any rules in
> the E-switch on the PF driver as by default all traffic is allowed.
>
> Is this correct?
>

correct.

>
>
> I tried but as this didn't work. I added ingress rules to allow VLAN
> traffic and also egress rules to the E-switch ACL tables.
>

How did you do this ? changed the code ? by default (no VST/no spoof
check) ACL tables should not be opened.

> But still i cannot see any traffic between mlx5 VF driver and mlx5
> dedicated driver.
>
> I can see that the packets reach the CX4 adapter (mlx5 VF/mlx5 PF) but
> they don't go out on the network.
>
> i tried configuring vlan using ip link add dev enp1s0f0 name
> enp1s0f0.100 type vlan id 100 on both the mlx5 vf driver and mlx5
> dedicated driver.
>
>

Strange, should work. Please make sure VST mode is off on the VF.
can you share some logs with us:
on PF/VF and remote dedicated driver:

ip link show 
ethtool -k 

Thanks,
Saeed.


[PATCH net] bonding: fix enslavement slave link notifications

2016-07-05 Thread Saeed Mahameed
From: Aviv Heller <av...@mellanox.com>

Currently, link notifications are not sent by
bond_set_slave_link_state() upon enslavement if
the slave is enslaved when up.

This happens because slave->link default init value
is 0, which is the same as BOND_LINK_UP, resulting
in bond_set_slave_link_state() ignoring this transition.

This patch sets the default value of slave->link to
BOND_LINK_NOCHANGE, assuring it will count as a state
transition and thus trigger notification logic.

Signed-off-by: Aviv Heller <av...@mellanox.com>
Reviewed-by: Jiri Pirko <j...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/bonding/bond_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 941ec99..a2afa3b 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1584,6 +1584,7 @@ int bond_enslave(struct net_device *bond_dev, struct 
net_device *slave_dev)
}
 
/* check for initial state */
+   new_slave->link = BOND_LINK_NOCHANGE;
if (bond->params.miimon) {
if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) {
if (bond->params.updelay) {
-- 
2.8.0



Re: mlx5 SRIOV VLAN support

2016-07-05 Thread Saeed Mahameed
On Tue, Jul 5, 2016 at 12:23 PM, domingo montoya
<reach.domingomont...@gmail.com> wrote:
> Thanks so much for the reply. Please find the logs.
>
>
[...]
> # ip link show enp1s0f0
> 3: enp1s0f0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state
> UP mode DEFAULT qlen 1000
> link/ether e4:1d:2d:c9:c8:9a brd ff:ff:ff:ff:ff:ff
>
> # ethtool -k enp1s0f0
> Features for enp1s0f0:
[...]
> rx-vlan-filter: on

I was basically looking for this and it looks ok.

>
>
>
>
> On Tue, Jul 5, 2016 at 1:41 PM, Saeed Mahameed
> <sae...@dev.mellanox.co.il> wrote:
>> On Tue, Jul 5, 2016 at 10:37 AM, domingo montoya
>> <reach.domingomont...@gmail.com> wrote:
>>> Hi,
>>>
>>> Is VLAN supported in SRIOV mode for mlx5?
>>>
>>
>> For VGT mode vlan is supported by default same as bare-metal.
>> For VST you need to use ip tool to configure VST vlan:  ip link set
>> eth vf 1 vlan 2 qos 2
>>
>> note: VGT mode will not work on VF if VST is configured on that VF.
>
> Can you please tell me how can I check if VST mode is enabled on the
> VF from the VF driver.
>

from the VF driver you can't.
you need to run ip link show on the PF interface of the SRIOV Hypervisor.

>
>
>>
>>>
>>> Can anyone please help me to:
>>>
>>> Enable VLAN guest tagging in mlx5 PF driver?
>>>
>>> Do I need to add any E-switch rules to do the same?
>>>
>>
>> No,
>> Nothing is required for VGT mode, just create the vlan on guest VF
>> machine and work with it!
>> E-Swtich is automatically/dynamically configured.
>
>
> Is it something like when we create vlan on the guest VF, the VF
> driver sends commands to the adapter fw and the adapter fw sends any
> notification to the PF driver to configure Eswitch?
>

yes but for vlan lists we do nothing in PF driver e-switch
notification since it e-switch allows all vlans by default.

>
>>
>>>
>>> My requirement is something like this:
>>>
>>>
>>> mlx5 VF driver<--->mlx5 PF
>>> driver<->Network<---> mlx5 dedicated
>>> driver(non-sriov)
>>>
>>>
>>>
>>> I would like to be able to send ICMP traffic between mlx5 VF driver
>>> and mlx5 dedicated driver.
>>>
>>> My understanding is I should be able to do without adding any rules in
>>> the E-switch on the PF driver as by default all traffic is allowed.
>>>
>>> Is this correct?
>>>
>>
>> correct.
>>
>>>
>>>
>>> I tried but as this didn't work. I added ingress rules to allow VLAN
>>> traffic and also egress rules to the E-switch ACL tables.
>>>
>>
>> How did you do this ? changed the code ? by default (no VST/no spoof
>> check) ACL tables should not be opened.
>
>
> Yes. I changed the code. Okay. Now I understand that ACL tables are
> only for VST or spoofcheck. Thanks!
>

did you change the code anywhere else ?

>>
>>> But still i cannot see any traffic between mlx5 VF driver and mlx5
>>> dedicated driver.
>>>
>>> I can see that the packets reach the CX4 adapter (mlx5 VF/mlx5 PF) but
>>> they don't go out on the network.
>>>
>>> i tried configuring vlan using ip link add dev enp1s0f0 name
>>> enp1s0f0.100 type vlan id 100 on both the mlx5 vf driver and mlx5
>>> dedicated driver.
>>>
>>>
>>
>> Strange, should work. Please make sure VST mode is off on the VF.
>> can you share some logs with us:
>> on PF/VF and remote dedicated driver:
>>
>> ip link show 
>> ethtool -k 
>
>
> Sure. Wanted to check how to turn off the VST mode on the VF or make
> sure it is off.  I have pasted the logs with this mail(at the
> beginning).
> I had one more doubt. If we need to disable VST mode on the VF from
> the PF driver, using for e.g
>
> ip link set dev eth0 vf 2 4095,
>
>
> will this command eventually call .ndo_set_vf_vlan and setup the
> Eswitch ACL Ingress, Egress rules?
>

no, to disable you need to run 'ip link set dev eth0 vf 2 vlan 0 qos 0'
BTW it should be disabled by default.

to see if it disabled just run 'ip link show dev eth0' and see no vlan
is configured on VF 2


Re: [PATCH] net/mlx5: Avoid setting unused var when modifying vport node GUID

2016-07-05 Thread Saeed Mahameed
On Tue, Jul 5, 2016 at 12:17 PM, Or Gerlitz <ogerl...@mellanox.com> wrote:
> GCC complains on unused-but-set-variable, clean this up.
>
> Fixes: 23898c763f4a ('net/mlx5: E-Switch, Modify node guid on vf set MAC')
> Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>

Acked-by: Saeed Mahameed <sae...@mellanox.com>


Re: [PATCH v6 05/12] Add sample for adding simple drop program to link

2016-07-09 Thread Saeed Mahameed
On Fri, Jul 8, 2016 at 5:15 AM, Brenden Blanco  wrote:
> Add a sample program that only drops packets at the BPF_PROG_TYPE_XDP_RX
> hook of a link. With the drop-only program, observed single core rate is
> ~20Mpps.
>
> Other tests were run, for instance without the dropcnt increment or
> without reading from the packet header, the packet rate was mostly
> unchanged.
>
> $ perf record -a samples/bpf/xdp1 $( proto 17:   20403027 drops/s
>
> ./pktgen_sample03_burst_single_flow.sh -i $DEV -d $IP -m $MAC -t 4
> Running... ctrl^C to stop
> Device: eth4@0
> Result: OK: 11791017(c11788327+d2689) usec, 59622913 (60byte,0frags)
>   5056638pps 2427Mb/sec (2427186240bps) errors: 0
> Device: eth4@1
> Result: OK: 11791012(c11787906+d3106) usec, 60526944 (60byte,0frags)
>   5133311pps 2463Mb/sec (2463989280bps) errors: 0
> Device: eth4@2
> Result: OK: 11791019(c11788249+d2769) usec, 59868091 (60byte,0frags)
>   5077431pps 2437Mb/sec (2437166880bps) errors: 0
> Device: eth4@3
> Result: OK: 11795039(c11792403+d2636) usec, 59483181 (60byte,0frags)
>   5043067pps 2420Mb/sec (2420672160bps) errors: 0
>
> perf report --no-children:
>  26.05%  ksoftirqd/0  [mlx4_en] [k] mlx4_en_process_rx_cq
>  17.84%  ksoftirqd/0  [mlx4_en] [k] mlx4_en_alloc_frags
>   5.52%  ksoftirqd/0  [mlx4_en] [k] mlx4_en_free_frag

This just proves my point on the previous patch, reusing the rx_desc
buffers we are going to drop will save us here ~23% CPU wasted on
(alloc_frags & free_frags ) ! and this can improve some benchmarks
results where the CPU is the bottleneck.


Re: [PATCH v6 04/12] net/mlx4_en: add support for fast rx drop bpf program

2016-07-09 Thread Saeed Mahameed
On Fri, Jul 8, 2016 at 5:15 AM, Brenden Blanco  wrote:
> Add support for the BPF_PROG_TYPE_XDP hook in mlx4 driver.
>
> In tc/socket bpf programs, helpers linearize skb fragments as needed
> when the program touchs the packet data. However, in the pursuit of
> speed, XDP programs will not be allowed to use these slower functions,
> especially if it involves allocating an skb.
>
> Therefore, disallow MTU settings that would produce a multi-fragment
> packet that XDP programs would fail to access. Future enhancements could
> be done to increase the allowable MTU.
>
> Signed-off-by: Brenden Blanco 
> ---
>  drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 38 
> ++
>  drivers/net/ethernet/mellanox/mlx4/en_rx.c | 36 +---
>  drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |  5 
>  3 files changed, 75 insertions(+), 4 deletions(-)
>
[...]
> +   /* A bpf program gets first chance to drop the packet. It may
> +* read bytes but not past the end of the frag.
> +*/
> +   if (prog) {
> +   struct xdp_buff xdp;
> +   dma_addr_t dma;
> +   u32 act;
> +
> +   dma = be64_to_cpu(rx_desc->data[0].addr);
> +   dma_sync_single_for_cpu(priv->ddev, dma,
> +   priv->frag_info[0].frag_size,
> +   DMA_FROM_DEVICE);

In case of XDP_PASS we will dma_sync again in the normal path, this
can be improved by doing the dma_sync as soon as we can and once and
for all, regardless of the path the packet is going to take
(XDP_DROP/mlx4_en_complete_rx_desc/mlx4_en_rx_skb).

> +
> +   xdp.data = page_address(frags[0].page) +
> +   frags[0].page_offset;
> +   xdp.data_end = xdp.data + length;
> +
> +   act = bpf_prog_run_xdp(prog, );
> +   switch (act) {
> +   case XDP_PASS:
> +   break;
> +   default:
> +   bpf_warn_invalid_xdp_action(act);
> +   case XDP_DROP:
> +   goto next;

The drop action here (goto next) will release the current rx_desc
buffers and use new ones to refill, I know that the mlx4 rx scheme
will release/allocate new pages once every ~32 packet, but one
improvement can really help here especially for XDP_DROP benchmarks is
to reuse the current rx_desc buffers in case it is going to be
dropped.

Considering if mlx4 rx buffer scheme doesn't allow gaps, Maybe this
can be added later as future improvement for the whole mlx4 rx data
path drop decisions.


Re: [PATCH v6 04/12] net/mlx4_en: add support for fast rx drop bpf program

2016-07-11 Thread Saeed Mahameed
On Sun, Jul 10, 2016 at 7:05 PM, Brenden Blanco <bbla...@plumgrid.com> wrote:
> On Sun, Jul 10, 2016 at 06:25:40PM +0300, Tariq Toukan wrote:
>>
>> On 09/07/2016 10:58 PM, Saeed Mahameed wrote:
>> >On Fri, Jul 8, 2016 at 5:15 AM, Brenden Blanco <bbla...@plumgrid.com> wrote:
>> >>+   /* A bpf program gets first chance to drop the packet. It 
>> >>may
>> >>+* read bytes but not past the end of the frag.
>> >>+*/
>> >>+   if (prog) {
>> >>+   struct xdp_buff xdp;
>> >>+   dma_addr_t dma;
>> >>+   u32 act;
>> >>+
>> >>+   dma = be64_to_cpu(rx_desc->data[0].addr);
>> >>+   dma_sync_single_for_cpu(priv->ddev, dma,
>> >>+   
>> >>priv->frag_info[0].frag_size,
>> >>+   DMA_FROM_DEVICE);
>> >In case of XDP_PASS we will dma_sync again in the normal path, this
>> >can be improved by doing the dma_sync as soon as we can and once and
>> >for all, regardless of the path the packet is going to take
>> >(XDP_DROP/mlx4_en_complete_rx_desc/mlx4_en_rx_skb).
>> I agree with Saeed, dma_sync is a heavy operation that is now done
>> twice for all packets with XDP_PASS.
>> We should try our best to avoid performance degradation in the flow
>> of unfiltered packets.
> Makes sense, do folks here see a way to do this cleanly?

yes, we need something like:

+static inline void
+mlx4_en_sync_dma(struct mlx4_en_priv *priv,
+struct mlx4_en_rx_desc *rx_desc,
+int length)
+{
+   dma_addr_t dma;
+
+   /* Sync dma addresses from HW descriptor */
+   for (nr = 0; nr < priv->num_frags; nr++) {
+   struct mlx4_en_frag_info *frag_info = >frag_info[nr];
+
+   if (length <= frag_info->frag_prefix_size)
+   break;
+
+   dma = be64_to_cpu(rx_desc->data[nr].addr);
+   dma_sync_single_for_cpu(priv->ddev, dma, frag_info->frag_size,
+   DMA_FROM_DEVICE);
+   }
+}


@@ -790,6 +808,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev,
struct mlx4_en_cq *cq, int bud
goto next;
}

+   length = be32_to_cpu(cqe->byte_cnt);
+   length -= ring->fcs_del;
+
+   mlx4_en_sync_dma(priv,rx_desc, length);
 /* data is available continue processing the packet */

and make sure to remove all explicit dma_sync_single_for_cpu calls.


[PATCH net-next 3/8] net/mlx5: Properly remove all steering objects

2016-07-04 Thread Saeed Mahameed
From: Maor Gottlieb <ma...@mellanox.com>

Instead of explicitly cleaning up the well known parts of the steering
tree, we use the generic tree structure to traverse for cleanup.
No functional changes.

Signed-off-by: Maor Gottlieb <ma...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 120 +++---
 1 file changed, 15 insertions(+), 105 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 0f969cb..3e95775 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1663,115 +1663,24 @@ cleanup:
return -ENOMEM;
 }
 
-static void cleanup_single_prio_root_ns(struct mlx5_flow_steering *steering,
-   struct mlx5_flow_root_namespace 
*root_ns)
+static void clean_tree(struct fs_node *node)
 {
-   struct fs_node *prio;
-
-   if (!root_ns)
-   return;
+   if (node) {
+   struct fs_node *iter;
+   struct fs_node *temp;
 
-   if (!list_empty(_ns->ns.node.children)) {
-   prio = list_first_entry(_ns->ns.node.children,
-   struct fs_node,
-list);
-   if (tree_remove_node(prio))
-   mlx5_core_warn(steering->dev,
-  "Flow steering priority wasn't 
destroyed, refcount > 1\n");
+   list_for_each_entry_safe(iter, temp, >children, list)
+   clean_tree(iter);
+   tree_remove_node(node);
}
-   if (tree_remove_node(_ns->ns.node))
-   mlx5_core_warn(steering->dev,
-  "Flow steering namespace wasn't destroyed, 
refcount > 1\n");
-   root_ns = NULL;
-}
-
-static void destroy_flow_tables(struct fs_prio *prio)
-{
-   struct mlx5_flow_table *iter;
-   struct mlx5_flow_table *tmp;
-
-   fs_for_each_ft_safe(iter, tmp, prio)
-   mlx5_destroy_flow_table(iter);
 }
 
-static void cleanup_root_ns(struct mlx5_flow_steering *steering)
+static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns)
 {
-   struct mlx5_flow_root_namespace *root_ns = steering->root_ns;
-   struct fs_prio *iter_prio;
-
-   if (!MLX5_CAP_GEN(steering->dev, nic_flow_table))
-   return;
-
if (!root_ns)
return;
 
-   /* stage 1 */
-   fs_for_each_prio(iter_prio, _ns->ns) {
-   struct fs_node *node;
-   struct mlx5_flow_namespace *iter_ns;
-
-   fs_for_each_ns_or_ft(node, iter_prio) {
-   if (node->type == FS_TYPE_FLOW_TABLE)
-   continue;
-   fs_get_obj(iter_ns, node);
-   while (!list_empty(_ns->node.children)) {
-   struct fs_prio *obj_iter_prio2;
-   struct fs_node *iter_prio2 =
-   
list_first_entry(_ns->node.children,
-struct fs_node,
-list);
-
-   fs_get_obj(obj_iter_prio2, iter_prio2);
-   destroy_flow_tables(obj_iter_prio2);
-   if (tree_remove_node(iter_prio2)) {
-   mlx5_core_warn(steering->dev,
-  "Priority %d wasn't 
destroyed, refcount > 1\n",
-  obj_iter_prio2->prio);
-   return;
-   }
-   }
-   }
-   }
-
-   /* stage 2 */
-   fs_for_each_prio(iter_prio, _ns->ns) {
-   while (!list_empty(_prio->node.children)) {
-   struct fs_node *iter_ns =
-   list_first_entry(_prio->node.children,
-struct fs_node,
-list);
-   if (tree_remove_node(iter_ns)) {
-   mlx5_core_warn(steering->dev,
-  "Namespace wasn't destroyed, 
refcount > 1\n");
-   return;
-   }
-   }
-   }
-
-   /* stage 3 */
-   while (!list_empty(_ns->ns.node.children)) {
-   struct fs_prio *obj_prio_node;
-   struct fs_node *prio_node =
-   list_first_entry(_ns->ns.node.children,
-   

[PATCH net-next 2/8] net/mlx5: Introduce mlx5_flow_steering structure

2016-07-04 Thread Saeed Mahameed
From: Maor Gottlieb <ma...@mellanox.com>

Instead of having all steering private name spaces and
steering module fields flat in mlx5_core_priv, we wrap
them in mlx5_flow_steering for better modularity and
API exposure.

Signed-off-by: Maor Gottlieb <ma...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 134 --
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h |   8 ++
 include/linux/mlx5/driver.h   |   6 +-
 3 files changed, 84 insertions(+), 64 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 7fcdae1..0f969cb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1363,12 +1363,13 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
 struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
enum 
mlx5_flow_namespace_type type)
 {
-   struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns;
+   struct mlx5_flow_steering *steering = dev->priv.steering;
+   struct mlx5_flow_root_namespace *root_ns;
int prio;
struct fs_prio *fs_prio;
struct mlx5_flow_namespace *ns;
 
-   if (!root_ns)
+   if (!steering)
return NULL;
 
switch (type) {
@@ -1380,24 +1381,28 @@ struct mlx5_flow_namespace 
*mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
prio = type;
break;
case MLX5_FLOW_NAMESPACE_FDB:
-   if (dev->priv.fdb_root_ns)
-   return >priv.fdb_root_ns->ns;
+   if (steering->fdb_root_ns)
+   return >fdb_root_ns->ns;
else
return NULL;
case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
-   if (dev->priv.esw_egress_root_ns)
-   return >priv.esw_egress_root_ns->ns;
+   if (steering->esw_egress_root_ns)
+   return >esw_egress_root_ns->ns;
else
return NULL;
case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
-   if (dev->priv.esw_ingress_root_ns)
-   return >priv.esw_ingress_root_ns->ns;
+   if (steering->esw_ingress_root_ns)
+   return >esw_ingress_root_ns->ns;
else
return NULL;
default:
return NULL;
}
 
+   root_ns = steering->root_ns;
+   if (!root_ns)
+   return NULL;
+
fs_prio = find_prio(_ns->ns, prio);
if (!fs_prio)
return NULL;
@@ -1483,13 +1488,13 @@ static bool has_required_caps(struct mlx5_core_dev 
*dev, struct node_caps *caps)
return true;
 }
 
-static int init_root_tree_recursive(struct mlx5_core_dev *dev,
+static int init_root_tree_recursive(struct mlx5_flow_steering *steering,
struct init_tree_node *init_node,
struct fs_node *fs_parent_node,
struct init_tree_node *init_parent_node,
int prio)
 {
-   int max_ft_level = MLX5_CAP_FLOWTABLE(dev,
+   int max_ft_level = MLX5_CAP_FLOWTABLE(steering->dev,
  flow_table_properties_nic_receive.
  max_ft_level);
struct mlx5_flow_namespace *fs_ns;
@@ -1500,7 +1505,7 @@ static int init_root_tree_recursive(struct mlx5_core_dev 
*dev,
 
if (init_node->type == FS_TYPE_PRIO) {
if ((init_node->min_ft_level > max_ft_level) ||
-   !has_required_caps(dev, _node->caps))
+   !has_required_caps(steering->dev, _node->caps))
return 0;
 
fs_get_obj(fs_ns, fs_parent_node);
@@ -1521,7 +1526,7 @@ static int init_root_tree_recursive(struct mlx5_core_dev 
*dev,
}
prio = 0;
for (i = 0; i < init_node->ar_size; i++) {
-   err = init_root_tree_recursive(dev, _node->children[i],
+   err = init_root_tree_recursive(steering, 
_node->children[i],
   base, init_node, prio);
if (err)
return err;
@@ -1534,7 +1539,7 @@ static int init_root_tree_recursive(struct mlx5_core_dev 
*dev,
return 0;
 }
 
-static int init_root_tree(struct mlx5_core_dev *dev,
+static int init_root_tree(struct mlx5_flow_steering *steering,
  struct init_tree_node *init_node,
  struct fs_node *fs_parent_node)
 {
@@ -1544,7 +1549,7 @@ static int init_root_tr

[PATCH net-next 5/8] net/mlx5e: Support l3/l4 flow type specs in ethtool flow steering

2016-07-04 Thread Saeed Mahameed
From: Maor Gottlieb <ma...@mellanox.com>

Add support to add flow steering rules with ethtool
of L3/L4 flow types (ip4/tcp4/udp4).
Those rules will be in higher priority than l2 flow rules, in order
to prefer more specific rules.

Mask is not supported for l3/l4 flow types.

Signed-off-by: Maor Gottlieb <ma...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |   2 +
 .../ethernet/mellanox/mlx5/core/en_fs_ethtool.c| 161 -
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  |   2 +-
 3 files changed, 163 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 357320e..9842594 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -549,9 +549,11 @@ struct mlx5e_ethtool_table {
intnum_rules;
 };
 
+#define ETHTOOL_NUM_L3_L4_FTS 7
 #define ETHTOOL_NUM_L2_FTS 4
 
 struct mlx5e_ethtool_steering {
+   struct mlx5e_ethtool_table  l3_l4_ft[ETHTOOL_NUM_L3_L4_FTS];
struct mlx5e_ethtool_table  l2_ft[ETHTOOL_NUM_L2_FTS];
struct list_headrules;
int tot_num_rules;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index ee28a9f..830106e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -48,7 +48,8 @@ static void put_flow_table(struct mlx5e_ethtool_table *eth_ft)
}
 }
 
-#define MLX5E_ETHTOOL_L2_PRIO 0
+#define MLX5E_ETHTOOL_L3_L4_PRIO 0
+#define MLX5E_ETHTOOL_L2_PRIO (MLX5E_ETHTOOL_L3_L4_PRIO + 
ETHTOOL_NUM_L3_L4_FTS)
 #define MLX5E_ETHTOOL_NUM_ENTRIES 64000
 #define MLX5E_ETHTOOL_NUM_GROUPS  10
 static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv,
@@ -63,6 +64,17 @@ static struct mlx5e_ethtool_table *get_flow_table(struct 
mlx5e_priv *priv,
int prio;
 
switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+   case TCP_V4_FLOW:
+   case UDP_V4_FLOW:
+   max_tuples = ETHTOOL_NUM_L3_L4_FTS;
+   prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples);
+   eth_ft = >fs.ethtool.l3_l4_ft[prio];
+   break;
+   case IP_USER_FLOW:
+   max_tuples = ETHTOOL_NUM_L3_L4_FTS;
+   prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples);
+   eth_ft = >fs.ethtool.l3_l4_ft[prio];
+   break;
case ETHER_FLOW:
max_tuples = ETHTOOL_NUM_L2_FTS;
prio = max_tuples - num_tuples;
@@ -103,6 +115,31 @@ static void mask_spec(u8 *mask, u8 *val, size_t size)
*((u8 *)val) = *((u8 *)mask) & *((u8 *)val);
 }
 
+static void set_ips(void *outer_headers_v, void *outer_headers_c, __be32 
ip4src_m,
+   __be32 ip4src_v, __be32 ip4dst_m, __be32 ip4dst_v)
+{
+   if (ip4src_m) {
+   memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+   src_ipv4_src_ipv6.ipv4_layout.ipv4),
+  _v, sizeof(ip4src_v));
+   memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+   src_ipv4_src_ipv6.ipv4_layout.ipv4),
+  0xff, sizeof(ip4src_m));
+   }
+   if (ip4dst_m) {
+   memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+   dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+  _v, sizeof(ip4dst_v));
+   memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+   dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+  0xff, sizeof(ip4dst_m));
+   }
+   MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ethertype, ETH_P_IP);
+   MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ethertype, 0x);
+}
+
 static int set_flow_attrs(u32 *match_c, u32 *match_v,
  struct ethtool_rx_flow_spec *fs)
 {
@@ -111,10 +148,66 @@ static int set_flow_attrs(u32 *match_c, u32 *match_v,
void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
 outer_headers);
u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
+   struct ethtool_tcpip4_spec *l4_mask;
+   struct ethtool_tcpip4_spec *l4_val;
+   struct ethtool_usrip4_spec *l3_mask;
+   struct ethtool_usrip4_spec *l3_val;
struct ethhdr *eth_val;
struct ethhdr *eth_mask;
 
switch (flow_type) {
+   case TCP_V4_FLOW:
+   l4_mask = >m_u.tcp_ip4_spec;
+   l4_val = >h_u.tcp_ip4_spec;
+   set_ips(o

[PATCH net-next 8/8] net/mlx5e: Expose flow control counters to ethtool

2016-07-04 Thread Saeed Mahameed
From: Gal Pressman <g...@mellanox.com>

Just like per prio counters, the global flow counters are queried from
per priority counters register.
Global flow control counters are stored in priority 0 PFC counters.

Signed-off-by: Gal Pressman <g...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   | 35 --
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 11 +++
 2 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index d652aa9..4a3757e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -139,6 +139,18 @@ static unsigned long mlx5e_query_pfc_combined(struct 
mlx5e_priv *priv)
return err ? 0 : pfc_en_tx | pfc_en_rx;
 }
 
+static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv)
+{
+   struct mlx5_core_dev *mdev = priv->mdev;
+   u32 rx_pause;
+   u32 tx_pause;
+   int err;
+
+   err = mlx5_query_port_pause(mdev, _pause, _pause);
+
+   return err ? false : rx_pause | tx_pause;
+}
+
 #define MLX5E_NUM_Q_CNTRS(priv) (NUM_Q_COUNTERS * (!!priv->q_counter))
 #define MLX5E_NUM_RQ_STATS(priv) \
(NUM_RQ_STATS * priv->params.num_channels * \
@@ -147,8 +159,8 @@ static unsigned long mlx5e_query_pfc_combined(struct 
mlx5e_priv *priv)
(NUM_SQ_STATS * priv->params.num_channels * priv->params.num_tc * \
 test_bit(MLX5E_STATE_OPENED, >state))
 #define MLX5E_NUM_PFC_COUNTERS(priv) \
-   (hweight8(mlx5e_query_pfc_combined(priv)) * \
-NUM_PPORT_PER_PRIO_PFC_COUNTERS)
+   ((mlx5e_query_global_pause_combined(priv) + 
hweight8(mlx5e_query_pfc_combined(priv))) * \
+ NUM_PPORT_PER_PRIO_PFC_COUNTERS)
 
 static int mlx5e_get_sset_count(struct net_device *dev, int sset)
 {
@@ -210,8 +222,18 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv 
*priv, uint8_t *data)
pfc_combined = mlx5e_query_pfc_combined(priv);
for_each_set_bit(prio, _combined, NUM_PPORT_PRIO) {
for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
+   char pfc_string[ETH_GSTRING_LEN];
+
+   snprintf(pfc_string, sizeof(pfc_string), "prio%d", 
prio);
sprintf(data + (idx++) * ETH_GSTRING_LEN,
-   pport_per_prio_pfc_stats_desc[i].format, prio);
+   pport_per_prio_pfc_stats_desc[i].format, 
pfc_string);
+   }
+   }
+
+   if (mlx5e_query_global_pause_combined(priv)) {
+   for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
+   sprintf(data + (idx++) * ETH_GSTRING_LEN,
+   pport_per_prio_pfc_stats_desc[i].format, 
"global");
}
}
 
@@ -306,6 +328,13 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev,
}
}
 
+   if (mlx5e_query_global_pause_combined(priv)) {
+   for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
+   data[idx++] = 
MLX5E_READ_CTR64_BE(>stats.pport.per_prio_counters[0],
+ 
pport_per_prio_pfc_stats_desc, 0);
+   }
+   }
+
if (!test_bit(MLX5E_STATE_OPENED, >state))
return;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index ae29998..7b9d8a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -254,11 +254,12 @@ static const struct counter_desc 
pport_per_prio_traffic_stats_desc[] = {
 };
 
 static const struct counter_desc pport_per_prio_pfc_stats_desc[] = {
-   { "rx_prio%d_pause", PPORT_PER_PRIO_OFF(rx_pause) },
-   { "rx_prio%d_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) },
-   { "tx_prio%d_pause", PPORT_PER_PRIO_OFF(tx_pause) },
-   { "tx_prio%d_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) },
-   { "rx_prio%d_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) 
},
+   /* %s is "global" or "prio{i}" */
+   { "rx_%s_pause", PPORT_PER_PRIO_OFF(rx_pause) },
+   { "rx_%s_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) },
+   { "tx_%s_pause", PPORT_PER_PRIO_OFF(tx_pause) },
+   { "tx_%s_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) },
+   { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) },
 };
 
 struct mlx5e_rq_stats {
-- 
2.8.0



[PATCH net-next 7/8] net/mlx5e: Expose RDMA VPort counters to ethtool

2016-07-04 Thread Saeed Mahameed
From: Gal Pressman <g...@mellanox.com>

Add the needed descriptors to expose RoCE RDMA counters.

Signed-off-by: Gal Pressman <g...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index fcd490c..ae29998 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -151,6 +151,22 @@ static const struct counter_desc vport_stats_desc[] = {
VPORT_COUNTER_OFF(transmitted_eth_broadcast.packets) },
{ "tx_vport_broadcast_bytes",
VPORT_COUNTER_OFF(transmitted_eth_broadcast.octets) },
+   { "rx_vport_rdma_unicast_packets",
+   VPORT_COUNTER_OFF(received_ib_unicast.packets) },
+   { "rx_vport_rdma_unicast_bytes",
+   VPORT_COUNTER_OFF(received_ib_unicast.octets) },
+   { "tx_vport_rdma_unicast_packets",
+   VPORT_COUNTER_OFF(transmitted_ib_unicast.packets) },
+   { "tx_vport_rdma_unicast_bytes",
+   VPORT_COUNTER_OFF(transmitted_ib_unicast.octets) },
+   { "rx_vport_rdma_multicast_packets",
+   VPORT_COUNTER_OFF(received_ib_multicast.packets) },
+   { "rx_vport_rdma_multicast_bytes",
+   VPORT_COUNTER_OFF(received_ib_multicast.octets) },
+   { "tx_vport_rdma_multicast_packets",
+   VPORT_COUNTER_OFF(transmitted_ib_multicast.packets) },
+   { "tx_vport_rdma_multicast_bytes",
+   VPORT_COUNTER_OFF(transmitted_ib_multicast.octets) },
 };
 
 #define PPORT_802_3_OFF(c) \
-- 
2.8.0



[PATCH net-next 4/8] net/mlx5e: Add ethtool flow steering support

2016-07-04 Thread Saeed Mahameed
From: Maor Gottlieb <ma...@mellanox.com>

Implement etrhtool set_rxnfc callback to support ethtool flow spec
direct steering. This patch adds only the support of ether flow type
spec. L3/L4 flow specs support will be added in downstream patches.

Signed-off-by: Maor Gottlieb <ma...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |  20 ++
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |  21 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c|   3 +
 .../ethernet/mellanox/mlx5/core/en_fs_ethtool.c| 393 +
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  |  28 +-
 include/linux/mlx5/fs.h|   1 +
 7 files changed, 456 insertions(+), 12 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile 
b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index a574dea..05cc1ef 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -8,6 +8,6 @@ mlx5_core-y :=  main.o cmd.o debugfs.o fw.o eq.o uar.o 
pagealloc.o \
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \
-   en_tc.o en_arfs.o en_rep.o
+   en_tc.o en_arfs.o en_rep.o en_fs_ethtool.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 00643a1..357320e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -544,8 +544,22 @@ enum {
MLX5E_ARFS_FT_LEVEL
 };
 
+struct mlx5e_ethtool_table {
+   struct mlx5_flow_table *ft;
+   intnum_rules;
+};
+
+#define ETHTOOL_NUM_L2_FTS 4
+
+struct mlx5e_ethtool_steering {
+   struct mlx5e_ethtool_table  l2_ft[ETHTOOL_NUM_L2_FTS];
+   struct list_headrules;
+   int tot_num_rules;
+};
+
 struct mlx5e_flow_steering {
struct mlx5_flow_namespace  *ns;
+   struct mlx5e_ethtool_steering   ethtool;
struct mlx5e_tc_table   tc;
struct mlx5e_vlan_table vlan;
struct mlx5e_l2_table   l2;
@@ -701,6 +715,12 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv);
 void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv);
 void mlx5e_init_l2_addr(struct mlx5e_priv *priv);
 void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft);
+int mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
+  struct ethtool_rx_flow_spec *fs);
+int mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv,
+ int location);
+void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv);
+void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv);
 void mlx5e_set_rx_mode_work(struct work_struct *work);
 
 void mlx5e_fill_hwstamp(struct mlx5e_tstamp *clock, u64 timestamp,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 7e61ffa..edbb665 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1368,6 +1368,26 @@ static u32 mlx5e_get_priv_flags(struct net_device 
*netdev)
return priv->pflags;
 }
 
+static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+   int err = 0;
+   struct mlx5e_priv *priv = netdev_priv(dev);
+
+   switch (cmd->cmd) {
+   case ETHTOOL_SRXCLSRLINS:
+   err = mlx5e_ethtool_flow_replace(priv, >fs);
+   break;
+   case ETHTOOL_SRXCLSRLDEL:
+   err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location);
+   break;
+   default:
+   err = -EOPNOTSUPP;
+   break;
+   }
+
+   return err;
+}
+
 const struct ethtool_ops mlx5e_ethtool_ops = {
.get_drvinfo   = mlx5e_get_drvinfo,
.get_link  = ethtool_op_get_link,
@@ -1387,6 +1407,7 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
.get_rxfh  = mlx5e_get_rxfh,
.set_rxfh  = mlx5e_set_rxfh,
.get_rxnfc = mlx5e_get_rxnfc,
+   .set_rxnfc = mlx5e_set_rxnfc,
.get_tunable   = mlx5e_get_tunable,
.set_tunable   = mlx5e_set_tunable,
.get_pauseparam= mlx5e_get_pauseparam,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 2e1e863..1587a9f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ether

[PATCH net-next 6/8] net/mlx5e: Add support to get ethtool flow rules

2016-07-04 Thread Saeed Mahameed
From: Maor Gottlieb <ma...@mellanox.com>

Enhance the existing get_rxnfc callback:
1. Get flow rule of specific ID.
2. Get all flow rules.
3. Get number of rules.

Signed-off-by: Maor Gottlieb <ma...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |  4 +++
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |  9 ++
 .../ethernet/mellanox/mlx5/core/en_fs_ethtool.c| 34 ++
 3 files changed, 47 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 9842594..1365cdc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -717,6 +717,10 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv);
 void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv);
 void mlx5e_init_l2_addr(struct mlx5e_priv *priv);
 void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft);
+int mlx5e_ethtool_get_flow(struct mlx5e_priv *priv, struct ethtool_rxnfc *info,
+  int location);
+int mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv,
+   struct ethtool_rxnfc *info, u32 *rule_locs);
 int mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
   struct ethtool_rx_flow_spec *fs);
 int mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index edbb665..d652aa9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -931,6 +931,15 @@ static int mlx5e_get_rxnfc(struct net_device *netdev,
case ETHTOOL_GRXRINGS:
info->data = priv->params.num_channels;
break;
+   case ETHTOOL_GRXCLSRLCNT:
+   info->rule_cnt = priv->fs.ethtool.tot_num_rules;
+   break;
+   case ETHTOOL_GRXCLSRULE:
+   err = mlx5e_ethtool_get_flow(priv, info, info->fs.location);
+   break;
+   case ETHTOOL_GRXCLSRLALL:
+   err = mlx5e_ethtool_get_all_flows(priv, info, rule_locs);
+   break;
default:
err = -EOPNOTSUPP;
break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index 830106e..d17c242 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -537,6 +537,40 @@ out:
return err;
 }
 
+int mlx5e_ethtool_get_flow(struct mlx5e_priv *priv, struct ethtool_rxnfc *info,
+  int location)
+{
+   struct mlx5e_ethtool_rule *eth_rule;
+
+   if (location < 0 || location >= MAX_NUM_OF_ETHTOOL_RULES)
+   return -EINVAL;
+
+   list_for_each_entry(eth_rule, >fs.ethtool.rules, list) {
+   if (eth_rule->flow_spec.location == location) {
+   info->fs = eth_rule->flow_spec;
+   return 0;
+   }
+   }
+
+   return -ENOENT;
+}
+
+int mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv, struct ethtool_rxnfc 
*info,
+   u32 *rule_locs)
+{
+   int location = 0;
+   int idx = 0;
+   int err = 0;
+
+   while ((!err || err == -ENOENT) && idx < info->rule_cnt) {
+   err = mlx5e_ethtool_get_flow(priv, info, location);
+   if (!err)
+   rule_locs[idx++] = location;
+   location++;
+   }
+   return err;
+}
+
 void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv)
 {
struct mlx5e_ethtool_rule *iter;
-- 
2.8.0



[PATCH net-next 1/8] net/mlx5: Refactor mlx5_add_flow_rule

2016-07-04 Thread Saeed Mahameed
From: Maor Gottlieb <ma...@mellanox.com>

Reduce the set of arguments passed to mlx5_add_flow_rule
by introducing flow_spec structure.

Signed-off-by: Maor Gottlieb <ma...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c  |  21 ++---
 drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c  |  68 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c|  96 
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c|  31 +++
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  | 100 -
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c |  55 ++--
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  |  22 ++---
 include/linux/mlx5/fs.h|  10 ++-
 8 files changed, 171 insertions(+), 232 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c 
b/drivers/infiniband/hw/mlx5/main.c
index b48ad85..dad63f0 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1528,21 +1528,18 @@ static struct mlx5_ib_flow_handler 
*create_flow_rule(struct mlx5_ib_dev *dev,
 {
struct mlx5_flow_table  *ft = ft_prio->flow_table;
struct mlx5_ib_flow_handler *handler;
+   struct mlx5_flow_spec *spec;
void *ib_flow = flow_attr + 1;
-   u8 match_criteria_enable = 0;
unsigned int spec_index;
-   u32 *match_c;
-   u32 *match_v;
u32 action;
int err = 0;
 
if (!is_valid_attr(flow_attr))
return ERR_PTR(-EINVAL);
 
-   match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
-   match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+   spec = mlx5_vzalloc(sizeof(*spec));
handler = kzalloc(sizeof(*handler), GFP_KERNEL);
-   if (!handler || !match_c || !match_v) {
+   if (!handler || !spec) {
err = -ENOMEM;
goto free;
}
@@ -1550,7 +1547,8 @@ static struct mlx5_ib_flow_handler 
*create_flow_rule(struct mlx5_ib_dev *dev,
INIT_LIST_HEAD(>list);
 
for (spec_index = 0; spec_index < flow_attr->num_of_specs; 
spec_index++) {
-   err = parse_flow_attr(match_c, match_v, ib_flow);
+   err = parse_flow_attr(spec->match_criteria,
+ spec->match_value, ib_flow);
if (err < 0)
goto free;
 
@@ -1558,11 +1556,11 @@ static struct mlx5_ib_flow_handler 
*create_flow_rule(struct mlx5_ib_dev *dev,
}
 
/* Outer header support only */
-   match_criteria_enable = (!outer_header_zero(match_c)) << 0;
+   spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria))
+   << 0;
action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
-   handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable,
-  match_c, match_v,
+   handler->rule = mlx5_add_flow_rule(ft, spec,
   action,
   MLX5_FS_DEFAULT_FLOW_TAG,
   dst);
@@ -1578,8 +1576,7 @@ static struct mlx5_ib_flow_handler 
*create_flow_rule(struct mlx5_ib_dev *dev,
 free:
if (err)
kfree(handler);
-   kfree(match_c);
-   kfree(match_v);
+   kvfree(spec);
return err ? ERR_PTR(err) : handler;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index 10f18d4..a8cb387 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -175,15 +175,12 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
 {
struct arfs_table *arfs_t = >fs.arfs.arfs_tables[type];
struct mlx5_flow_destination dest;
-   u8 match_criteria_enable = 0;
struct mlx5e_tir *tir = priv->indir_tir;
-   u32 *match_criteria;
-   u32 *match_value;
+   struct mlx5_flow_spec *spec;
int err = 0;
 
-   match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-   match_criteria  = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-   if (!match_value || !match_criteria) {
+   spec = mlx5_vzalloc(sizeof(*spec));
+   if (!spec) {
netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
err = -ENOMEM;
goto out;
@@ -208,8 +205,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
goto out;
}
 
-   arfs_t->default_rule = mlx5_add_flow_rule(arfs_t->ft.t, 
match_criteria_enable,
- match_cri

[PATCH net-next 0/8] Mellanox 100G mlx5 ethtool ntuple steering

2016-07-04 Thread Saeed Mahameed
Hi Dave,

This series adds Ethernet ethtool ntuple steering 'ethtool -N|U' and exposes 
two more
counter sets to Ethtool statistics, RDMA vport and global flow control 
statistics.

We start from three refactoring patches of the flow steering infrastructure
- mlx5_add_flow_rule will now receive mlx5 flow spec to simplify and reduce 
  number of parameters
- All low level steering objects are now wrapped in mlx5_flow_steering 
structure 
  for better encapsulation
- Flow steering object will now be removed properly and generically rather 
than 
  traversing on a well-known steering tree objects

Patch#4 adds the infrastructure and the data structures needed for the ethtool 
ntuple 
steering, all implemented in a new file 'en_fs_ethtool.c'.  Add the support for 
set_rxnfc 
ethtool callback to add/remove/replace a flow spec of ethter type L2.

Patch#5 adds the support for L3/L4 flow specs and a higher priority in favor 
for L3/L4 
rules when interleaving with L2 rules.

Patch#6 adds the support for get_rxnfc ethtool callback.

Patch#7,8 adds RDMA vport and global flow control statistics.

Applied on top: 8186f6e382d8 ('net-next: mediatek: fix compile error inside 
mtk_poll_controller()')

Thanks,
Saeed.

Gal Pressman (2):
  net/mlx5e: Expose RDMA VPort counters to ethtool
  net/mlx5e: Expose flow control counters to ethtool

Maor Gottlieb (6):
  net/mlx5: Refactor mlx5_add_flow_rule
  net/mlx5: Introduce mlx5_flow_steering structure
  net/mlx5: Properly remove all steering objects
  net/mlx5e: Add ethtool flow steering support
  net/mlx5e: Support l3/l4 flow type specs in ethtool flow steering
  net/mlx5e: Add support to get ethtool flow rules

 drivers/infiniband/hw/mlx5/main.c  |  21 +-
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |  26 +
 drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c  |  68 +--
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |  65 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c|  99 ++--
 .../ethernet/mellanox/mlx5/core/en_fs_ethtool.c| 586 +
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h |  27 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c|  31 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  | 100 ++--
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c |  55 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  | 272 --
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h  |   8 +
 include/linux/mlx5/driver.h|   6 +-
 include/linux/mlx5/fs.h|  11 +-
 15 files changed, 972 insertions(+), 405 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c

-- 
2.8.0



[PATCH net] net: poll tx timeout only on active tx queues

2016-06-30 Thread Saeed Mahameed
From: Mohamad Haj Yahia <moha...@mellanox.com>

Change the netdev watchdog to poll only the real active tx queues
instead of polling all tx queues.
The netdev driver doesn't necessarily have to start/stop all the
tx queues including the inactive tx queues.

Fixes: fd2ea0a79faa ('net: Use queue aware tests throughout.')
Signed-off-by: Mohamad Haj Yahia <moha...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 net/sched/sch_generic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index f9e0e9c..a10f0ff 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -253,7 +253,7 @@ static void dev_watchdog(unsigned long arg)
unsigned int i;
unsigned long trans_start;
 
-   for (i = 0; i < dev->num_tx_queues; i++) {
+   for (i = 0; i < dev->real_num_tx_queues; i++) {
struct netdev_queue *txq;
 
txq = netdev_get_tx_queue(dev, i);
-- 
2.8.0



[PATCH net 09/13] net/mlx5e: Handle RQ flush in error cases

2016-06-30 Thread Saeed Mahameed
From: Daniel Jurgens <dani...@mellanox.com>

Add a timeout to avoid an infinite loop waiting for RQ's to flush. This
occurs during AER/EEH and will also happen if the device stops posting
completions due to internal error or reset, or if moving the RQ to the
error state fails. Also cleanup posted receive resources when closing
the RQ.

Fixes: f62b8bb8f2d3 ('net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet 
functionality')
Signed-off-by: Daniel Jurgens <dani...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  7 
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 16 +++--
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c   | 41 +++
 3 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 244aced..b429591 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -191,6 +191,7 @@ struct mlx5e_tstamp {
 enum {
MLX5E_RQ_STATE_POST_WQES_ENABLE,
MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS,
+   MLX5E_RQ_STATE_FLUSH_TIMEOUT,
 };
 
 struct mlx5e_cq {
@@ -220,6 +221,8 @@ typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq *rq,
 typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq *rq, struct mlx5e_rx_wqe 
*wqe,
  u16 ix);
 
+typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq *rq, u16 ix);
+
 struct mlx5e_dma_info {
struct page *page;
dma_addr_t  addr;
@@ -241,6 +244,7 @@ struct mlx5e_rq {
struct mlx5e_cqcq;
mlx5e_fp_handle_rx_cqe handle_rx_cqe;
mlx5e_fp_alloc_wqe alloc_wqe;
+   mlx5e_fp_dealloc_wqe   dealloc_wqe;
 
unsigned long  state;
intix;
@@ -592,12 +596,15 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget);
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
 void mlx5e_free_tx_descs(struct mlx5e_sq *sq);
+void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
 
 void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
 int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix);
 int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 
ix);
+void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
+void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
 void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq);
 void mlx5e_complete_rx_linear_mpwqe(struct mlx5e_rq *rq,
struct mlx5_cqe64 *cqe,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 38c1286..103feab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -332,6 +332,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
}
rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq;
rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
+   rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
 
rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz);
rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides);
@@ -347,6 +348,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
}
rq->handle_rx_cqe = mlx5e_handle_rx_cqe;
rq->alloc_wqe = mlx5e_alloc_rx_wqe;
+   rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
 
rq->wqe_sz = (priv->params.lro_en) ?
priv->params.lro_wqe_sz :
@@ -552,17 +554,25 @@ err_destroy_rq:
 
 static void mlx5e_close_rq(struct mlx5e_rq *rq)
 {
+   int tout = 0;
+   int err;
+
clear_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, >state);
napi_synchronize(>channel->napi); /* prevent mlx5e_post_rx_wqes */
 
-   mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
-   while (!mlx5_wq_ll_is_empty(>wq))
-   msleep(20);
+   err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
+   while (!mlx5_wq_ll_is_empty(>wq) && !err &&
+  tout++ < MLX5_EN_QP_FLUSH_MAX_ITER)
+   msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT);
+
+   if (err || tout == MLX5_EN_QP_FLUSH_MAX_ITER)
+   set_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, >state);
 
/* avoid destroying rq before mlx5e_poll_rx_cq() is done with it */
napi_synchronize(>channel->napi);
 
mlx5e_disable_rq(rq);
+   mlx5e_free_rx_descs(rq);
mlx5e_destroy_rq(rq);
 }
 
diff --git a/driv

[PATCH net 10/13] net/mlx5e: Copy all L2 headers into inline segment

2016-06-30 Thread Saeed Mahameed
From: Matthew Finlay <m...@mellanox.com>

ConnectX4-Lx uses an inline wqe mode that currently defaults to
requiring the entire L2 header be included in the wqe.
This patch fixes mlx5e_get_inline_hdr_size() to account for
all L2 headers (VLAN, QinQ, etc) using skb_network_offset(skb).

Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files")
Signed-off-by: Matthew Finlay <m...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 65e3bce..42a5f06 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -123,7 +123,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq 
*sq,
 * headers and occur before the data gather.
 * Therefore these headers must be copied into the WQE
 */
-#define MLX5E_MIN_INLINE ETH_HLEN
+#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
 
if (bf) {
u16 ihs = skb_headlen(skb);
@@ -135,7 +135,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq 
*sq,
return skb_headlen(skb);
}
 
-   return MLX5E_MIN_INLINE;
+   return max(skb_network_offset(skb), MLX5E_MIN_INLINE);
 }
 
 static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data,
-- 
2.8.0



[PATCH net 12/13] net/mlx5e: Validate BW weight values of ETS

2016-06-30 Thread Saeed Mahameed
From: Rana Shahout <ra...@mellanox.com>

Valid weight assigned to ETS TClass values are 1-100

Fixes: 08fb1dacdd76 ('net/mlx5e: Support DCBNL IEEE ETS')
Signed-off-by: Rana Shahout <ra...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h   | 1 -
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 8 ++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index b429591..943b1bd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -145,7 +145,6 @@ struct mlx5e_umr_wqe {
 
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
-#define MLX5E_MIN_BW_ALLOC 1   /* Min percentage of BW allocation */
 #endif
 
 struct mlx5e_params {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index b2db180..c585349 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -96,7 +96,7 @@ static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 
*tc_tx_bw,
tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
break;
case IEEE_8021QAZ_TSA_ETS:
-   tc_tx_bw[i] = ets->tc_tx_bw[i] ?: MLX5E_MIN_BW_ALLOC;
+   tc_tx_bw[i] = ets->tc_tx_bw[i];
break;
}
}
@@ -140,8 +140,12 @@ static int mlx5e_dbcnl_validate_ets(struct ieee_ets *ets)
 
/* Validate Bandwidth Sum */
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-   if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS)
+   if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
+   if (!ets->tc_tx_bw[i])
+   return -EINVAL;
+
bw_sum += ets->tc_tx_bw[i];
+   }
}
 
if (bw_sum != 0 && bw_sum != 100)
-- 
2.8.0



[PATCH net 04/13] net/mlx5: Fix wait_vital for VFs and remove fixed sleep

2016-06-30 Thread Saeed Mahameed
From: Daniel Jurgens <dani...@mellanox.com>

The device ID for VFs is in a different location than PFs. This results
in the poll always timing out for VFs. There's no good way to read the
VF device ID without using the PF's configuration space.  Switch to waiting
for the health poll to start incrementing. Also remove the 1s sleep
at the beginning.

fixes: 89d44f0a6c73 ('net/mlx5_core: Add pci error handlers to mlx5_core
driver')
Signed-off-by: Daniel Jurgens <dani...@mellanox.com>

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 41 ++
 1 file changed, 15 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index c65f4a1..6695893 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1422,46 +1422,31 @@ void mlx5_disable_device(struct mlx5_core_dev *dev)
mlx5_pci_err_detected(dev->pdev, 0);
 }
 
-/* wait for the device to show vital signs. For now we check
- * that we can read the device ID and that the health buffer
- * shows a non zero value which is different than 0x
+/* wait for the device to show vital signs by waiting
+ * for the health counter to start counting.
  */
-static void wait_vital(struct pci_dev *pdev)
+static int wait_vital(struct pci_dev *pdev)
 {
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
struct mlx5_core_health *health = >priv.health;
const int niter = 100;
+   u32 last_count = 0;
u32 count;
-   u16 did;
int i;
 
-   /* Wait for firmware to be ready after reset */
-   msleep(1000);
-   for (i = 0; i < niter; i++) {
-   if (pci_read_config_word(pdev, 2, )) {
-   dev_warn(>dev, "failed reading config word\n");
-   break;
-   }
-   if (did == pdev->device) {
-   dev_info(>dev, "device ID correctly read after %d 
iterations\n", i);
-   break;
-   }
-   msleep(50);
-   }
-   if (i == niter)
-   dev_warn(>dev, "%s-%d: could not read device ID\n", 
__func__, __LINE__);
-
for (i = 0; i < niter; i++) {
count = ioread32be(health->health_counter);
if (count && count != 0x) {
-   dev_info(>dev, "Counter value 0x%x after %d 
iterations\n", count, i);
-   break;
+   if (last_count && last_count != count) {
+   dev_info(>dev, "Counter value 0x%x after 
%d iterations\n", count, i);
+   return 0;
+   }
+   last_count = count;
}
msleep(50);
}
 
-   if (i == niter)
-   dev_warn(>dev, "%s-%d: could not read device ID\n", 
__func__, __LINE__);
+   return -ETIMEDOUT;
 }
 
 static void mlx5_pci_resume(struct pci_dev *pdev)
@@ -1473,7 +1458,11 @@ static void mlx5_pci_resume(struct pci_dev *pdev)
dev_info(>dev, "%s was called\n", __func__);
 
pci_save_state(pdev);
-   wait_vital(pdev);
+   err = wait_vital(pdev);
+   if (err) {
+   dev_err(>dev, "%s: wait_vital timed out\n", __func__);
+   return;
+   }
 
err = mlx5_load_one(dev, priv);
if (err)
-- 
2.8.0



[PATCH net 08/13] net/mlx5e: Implement ndo_tx_timeout callback

2016-06-30 Thread Saeed Mahameed
From: Daniel Jurgens <dani...@mellanox.com>

Add callback to handle TX timeouts.

Fixes: f62b8bb8f2d3 ('net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet 
functionality')
Signed-off-by: Daniel Jurgens <dani...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 46 +++
 2 files changed, 47 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index c22d8c8..244aced 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -539,6 +539,7 @@ struct mlx5e_priv {
struct workqueue_struct*wq;
struct work_struct update_carrier_work;
struct work_struct set_rx_mode_work;
+   struct work_struct tx_timeout_work;
struct delayed_workupdate_stats_work;
 
struct mlx5_core_dev  *mdev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b94c84b..38c1286 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -98,6 +98,26 @@ static void mlx5e_update_carrier_work(struct work_struct 
*work)
mutex_unlock(>state_lock);
 }
 
+static void mlx5e_tx_timeout_work(struct work_struct *work)
+{
+   struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+  tx_timeout_work);
+   int err;
+
+   rtnl_lock();
+   mutex_lock(>state_lock);
+   if (!test_bit(MLX5E_STATE_OPENED, >state))
+   goto unlock;
+   mlx5e_close_locked(priv->netdev);
+   err = mlx5e_open_locked(priv->netdev);
+   if (err)
+   netdev_err(priv->netdev, "mlx5e_open_locked failed recovering 
from a tx_timeout, err(%d).\n",
+  err);
+unlock:
+   mutex_unlock(>state_lock);
+   rtnl_unlock();
+}
+
 static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
 {
struct mlx5e_sw_stats *s = >stats.sw;
@@ -2609,6 +2629,29 @@ static netdev_features_t mlx5e_features_check(struct 
sk_buff *skb,
return features;
 }
 
+static void mlx5e_tx_timeout(struct net_device *dev)
+{
+   struct mlx5e_priv *priv = netdev_priv(dev);
+   bool sched_work = false;
+   int i;
+
+   netdev_err(dev, "TX timeout detected\n");
+
+   for (i = 0; i < priv->params.num_channels * priv->params.num_tc; i++) {
+   struct mlx5e_sq *sq = priv->txq_to_sq_map[i];
+
+   if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, i)))
+   continue;
+   sched_work = true;
+   set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, >state);
+   netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, 
SQ Cons: 0x%x SQ Prod: 0x%x\n",
+  i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc);
+   }
+
+   if (sched_work && test_bit(MLX5E_STATE_OPENED, >state))
+   schedule_work(>tx_timeout_work);
+}
+
 static const struct net_device_ops mlx5e_netdev_ops_basic = {
.ndo_open= mlx5e_open,
.ndo_stop= mlx5e_close,
@@ -2626,6 +2669,7 @@ static const struct net_device_ops mlx5e_netdev_ops_basic 
= {
 #ifdef CONFIG_RFS_ACCEL
.ndo_rx_flow_steer   = mlx5e_rx_flow_steer,
 #endif
+   .ndo_tx_timeout  = mlx5e_tx_timeout,
 };
 
 static const struct net_device_ops mlx5e_netdev_ops_sriov = {
@@ -2655,6 +2699,7 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov 
= {
.ndo_get_vf_config   = mlx5e_get_vf_config,
.ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
.ndo_get_vf_stats= mlx5e_get_vf_stats,
+   .ndo_tx_timeout  = mlx5e_tx_timeout,
 };
 
 static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
@@ -2857,6 +2902,7 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev 
*mdev,
 
INIT_WORK(>update_carrier_work, mlx5e_update_carrier_work);
INIT_WORK(>set_rx_mode_work, mlx5e_set_rx_mode_work);
+   INIT_WORK(>tx_timeout_work, mlx5e_tx_timeout_work);
INIT_DELAYED_WORK(>update_stats_work, mlx5e_update_stats_work);
 }
 
-- 
2.8.0



[PATCH net 00/13] Mellanox 100G mlx5 resiliency and xmit path fixes

2016-06-30 Thread Saeed Mahameed
Hi Dave,

This series provides two set of fixes to the mlx5 driver:
- Resiliency fixes for reset flow and internal pci errors
- xmit path fixes

Please consider queuing those patches for -stable (4.6).

Reset flow fixes for core driver:
- Add more commands to the list of error simulated commands 
  when pci errors occur
- Avoid calling sleeping function by the health poll thread
- Fix incorrect page count when in internal error
- Fix timeout in wait vital for VFs
- Deadlock fix and Timeout handling in commands interface

Reset flow and resiliency fixes for mlx5e netdev driver:
- Handle RQ flush in error cases
- Implement ndo_tx_timeout callback
- Timeout if SQ doesn't flush during close
- Log link state changes
- Validate BW weight values of ETS

xmit path fixes:
- Fix wrong fallback assumption in select queue callback
- Account for all L2 headers when copying headers into inline segment

Thanks,
Saeed.

Daniel Jurgens (5):
  net/mlx5: Fix incorrect page count when in internal error
  net/mlx5: Fix wait_vital for VFs and remove fixed sleep
  net/mlx5e: Timeout if SQ doesn't flush during close
  net/mlx5e: Implement ndo_tx_timeout callback
  net/mlx5e: Handle RQ flush in error cases

Matthew Finlay (1):
  net/mlx5e: Copy all L2 headers into inline segment

Mohamad Haj Yahia (4):
  net/mlx5: Fix teardown errors that happen in pci error handler
  net/mlx5: Avoid calling sleeping function by the health poll thread
  net/mlx5: Fix potential deadlock in command mode change
  net/mlx5: Add timeout handle to commands with callback

Rana Shahout (2):
  net/mlx5e: Fix select queue callback
  net/mlx5e: Validate BW weight values of ETS

Shaker Daibes (1):
  net/mlx5e: Log link state changes

 drivers/net/ethernet/mellanox/mlx5/core/cmd.c  | 129 -
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |  11 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c |   8 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  99 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c|  41 +++
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c|  52 -
 drivers/net/ethernet/mellanox/mlx5/core/health.c   |  11 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c |  41 +++
 .../net/ethernet/mellanox/mlx5/core/pagealloc.c|  63 +++---
 include/linux/mlx5/driver.h|   1 +
 10 files changed, 335 insertions(+), 121 deletions(-)

-- 
2.8.0



[PATCH net 11/13] net/mlx5e: Fix select queue callback

2016-06-30 Thread Saeed Mahameed
From: Rana Shahout <ra...@mellanox.com>

The default fallback function used by mlx5e select queue can return
any TX queues in range [0..dev->num_real_tx_queues).

The current implementation assumes that the fallback function returns
a number in the range [0.. number of channels).  Actually
dev->num_real_tx_queues = (number of channels) * dev->num_tc;
which is more than the expected range if num_tc is configured and could
lead to crashes.

To fix this we test if num_tc is not configured we can safely return the
fallback suggestion, if not we will reciprocal_scale the fallback
result and normalize it to the desired range.

Fixes: 08fb1dacdd76 ('net/mlx5e: Support DCBNL IEEE ETS')
Signed-off-by: Rana Shahout <ra...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Reported-by: Doug Ledford <dledf...@redhat.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |  5 -
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c   | 16 ++--
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 103feab..216fe3e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1707,8 +1707,11 @@ static void mlx5e_netdev_set_tcs(struct net_device 
*netdev)
 
netdev_set_num_tc(netdev, ntc);
 
+   /* Map netdev TCs to offset 0
+* We have our own UP to TXQ mapping for QoS
+*/
for (tc = 0; tc < ntc; tc++)
-   netdev_set_tc_queue(netdev, tc, nch, tc * nch);
+   netdev_set_tc_queue(netdev, tc, nch, 0);
 }
 
 int mlx5e_open_locked(struct net_device *netdev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 42a5f06..5740b46 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -110,8 +110,20 @@ u16 mlx5e_select_queue(struct net_device *dev, struct 
sk_buff *skb,
 {
struct mlx5e_priv *priv = netdev_priv(dev);
int channel_ix = fallback(dev, skb);
-   int up = (netdev_get_num_tc(dev) && skb_vlan_tag_present(skb)) ?
-skb->vlan_tci >> VLAN_PRIO_SHIFT : 0;
+   int up = 0;
+
+   if (!netdev_get_num_tc(dev))
+   return channel_ix;
+
+   if (skb_vlan_tag_present(skb))
+   up = skb->vlan_tci >> VLAN_PRIO_SHIFT;
+
+   /* channel_ix can be larger than num_channels since
+* dev->num_real_tx_queues = num_channels * num_tc
+*/
+   if (channel_ix >= priv->params.num_channels)
+   channel_ix = reciprocal_scale(channel_ix,
+ priv->params.num_channels);
 
return priv->channeltc_to_txq_map[channel_ix][up];
 }
-- 
2.8.0



[PATCH net 06/13] net/mlx5: Add timeout handle to commands with callback

2016-06-30 Thread Saeed Mahameed
From: Mohamad Haj Yahia <moha...@mellanox.com>

The current implementation does not handle timeout in case of command
with callback request, and this can lead to deadlock if the command
doesn't get fw response.
Add delayed callback timeout work before posting the command to fw.
In case of real fw command completion we will cancel the delayed work.
In case of fw command timeout the callback timeout handler will be
called and it will simulate fw completion with timeout error.

Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters')
Signed-off-by: Mohamad Haj Yahia <moha...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 38 ++-
 include/linux/mlx5/driver.h   |  1 +
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 74067f5..d6e2a1c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -606,11 +606,36 @@ static void dump_command(struct mlx5_core_dev *dev,
pr_debug("\n");
 }
 
+static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
+{
+   struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data);
+
+   return be16_to_cpu(hdr->opcode);
+}
+
+static void cb_timeout_handler(struct work_struct *work)
+{
+   struct delayed_work *dwork = container_of(work, struct delayed_work,
+ work);
+   struct mlx5_cmd_work_ent *ent = container_of(dwork,
+struct mlx5_cmd_work_ent,
+cb_timeout_work);
+   struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev,
+cmd);
+
+   ent->ret = -ETIMEDOUT;
+   mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command 
resource\n",
+  mlx5_command_str(msg_to_opcode(ent->in)),
+  msg_to_opcode(ent->in));
+   mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
+}
+
 static void cmd_work_handler(struct work_struct *work)
 {
struct mlx5_cmd_work_ent *ent = container_of(work, struct 
mlx5_cmd_work_ent, work);
struct mlx5_cmd *cmd = ent->cmd;
struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, 
cmd);
+   unsigned long cb_timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
struct mlx5_cmd_layout *lay;
struct semaphore *sem;
unsigned long flags;
@@ -651,6 +676,9 @@ static void cmd_work_handler(struct work_struct *work)
dump_command(dev, ent, 1);
ent->ts1 = ktime_get_ns();
 
+   if (ent->callback)
+   schedule_delayed_work(>cb_timeout_work, cb_timeout);
+
/* ring doorbell after the descriptor is valid */
mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
wmb();
@@ -695,13 +723,6 @@ static const char *deliv_status_to_str(u8 status)
}
 }
 
-static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
-{
-   struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data);
-
-   return be16_to_cpu(hdr->opcode);
-}
-
 static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
 {
unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
@@ -765,6 +786,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, 
struct mlx5_cmd_msg *in,
if (!callback)
init_completion(>done);
 
+   INIT_DELAYED_WORK(>cb_timeout_work, cb_timeout_handler);
INIT_WORK(>work, cmd_work_handler);
if (page_queue) {
cmd_work_handler(>work);
@@ -1242,6 +1264,8 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 
vec)
struct semaphore *sem;
 
ent = cmd->ent_arr[i];
+   if (ent->callback)
+   cancel_delayed_work(>cb_timeout_work);
if (ent->page_queue)
sem = >pages_sem;
else
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 80776d0..fd72ecf 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -629,6 +629,7 @@ struct mlx5_cmd_work_ent {
void   *uout;
int uout_size;
mlx5_cmd_cbk_t  callback;
+   struct delayed_work cb_timeout_work;
void   *context;
int idx;
struct completion   done;
-- 
2.8.0



[PATCH net 03/13] net/mlx5: Fix incorrect page count when in internal error

2016-06-30 Thread Saeed Mahameed
From: Daniel Jurgens <dani...@mellanox.com>

Change page cleanup flow when in internal error to properly decrement
the page counts when reclaiming pages.  The prevents timing out waiting
for extra pages that were actually cleaned up previously.

fixes: 89d44f0a6c73 ('net/mlx5_core: Add pci error handlers to mlx5_core 
driver')
Signed-off-by: Daniel Jurgens <dani...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/pagealloc.c| 63 +++---
 1 file changed, 44 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c 
b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 905..32dea35 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -345,7 +345,6 @@ retry:
   func_id, npages, err);
goto out_4k;
}
-   dev->priv.fw_pages += npages;
 
err = mlx5_cmd_status_to_err();
if (err) {
@@ -373,6 +372,33 @@ out_free:
return err;
 }
 
+static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
+struct mlx5_manage_pages_inbox *in, int in_size,
+struct mlx5_manage_pages_outbox *out, int out_size)
+{
+   struct fw_page *fwp;
+   struct rb_node *p;
+   u32 npages;
+   u32 i = 0;
+
+   if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
+   return mlx5_cmd_exec_check_status(dev, (u32 *)in, in_size,
+ (u32 *)out, out_size);
+
+   npages = be32_to_cpu(in->num_entries);
+
+   p = rb_first(>priv.page_root);
+   while (p && i < npages) {
+   fwp = rb_entry(p, struct fw_page, rb_node);
+   out->pas[i] = cpu_to_be64(fwp->addr);
+   p = rb_next(p);
+   i++;
+   }
+
+   out->num_entries = cpu_to_be32(i);
+   return 0;
+}
+
 static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 int *nclaimed)
 {
@@ -398,15 +424,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 
func_id, int npages,
in.func_id = cpu_to_be16(func_id);
in.num_entries = cpu_to_be32(npages);
mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
-   err = mlx5_cmd_exec(dev, , sizeof(in), out, outlen);
+   err = reclaim_pages_cmd(dev, , sizeof(in), out, outlen);
if (err) {
-   mlx5_core_err(dev, "failed reclaiming pages\n");
-   goto out_free;
-   }
-   dev->priv.fw_pages -= npages;
-
-   if (out->hdr.status) {
-   err = mlx5_cmd_status_to_err(>hdr);
+   mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err);
goto out_free;
}
 
@@ -417,13 +437,15 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 
func_id, int npages,
err = -EINVAL;
goto out_free;
}
-   if (nclaimed)
-   *nclaimed = num_claimed;
 
for (i = 0; i < num_claimed; i++) {
addr = be64_to_cpu(out->pas[i]);
free_4k(dev, addr);
}
+
+   if (nclaimed)
+   *nclaimed = num_claimed;
+
dev->priv.fw_pages -= num_claimed;
if (func_id)
dev->priv.vfs_pages -= num_claimed;
@@ -514,14 +536,10 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
p = rb_first(>priv.page_root);
if (p) {
fwp = rb_entry(p, struct fw_page, rb_node);
-   if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
-   free_4k(dev, fwp->addr);
-   nclaimed = 1;
-   } else {
-   err = reclaim_pages(dev, fwp->func_id,
-   optimal_reclaimed_pages(),
-   );
-   }
+   err = reclaim_pages(dev, fwp->func_id,
+   optimal_reclaimed_pages(),
+   );
+
if (err) {
mlx5_core_warn(dev, "failed reclaiming pages 
(%d)\n",
   err);
@@ -536,6 +554,13 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
}
} while (p);
 
+   WARN(dev->priv.fw_pages,
+"FW pages counter is %d after reclaiming all pages\n",
+dev->priv.fw_pages);
+   WARN(dev->priv.vfs_pages,
+"VFs FW pages counter is %d after reclaiming all pages\n",
+dev->priv.vfs_pages);
+
return 0;
 }
 
-- 
2.8.0



[PATCH net 02/13] net/mlx5: Avoid calling sleeping function by the health poll thread

2016-06-30 Thread Saeed Mahameed
From: Mohamad Haj Yahia <moha...@mellanox.com>

In internal error state the health poll thread will eventually call
synchronize_irq() (to safely trigger command completions) which might
sleep, so we are calling sleeping function from atomic context which is
invalid.
Here we move trigger_cmd_completions(dev) to enter error state which is
the earliest stage in error state handling.
This way we won't need to wait for next health poll to trigger command
completions and will solve the scheduling while atomic issue.
mlx5_enter_error_state can be called from two contexts, protect it with
dev->intf_state_lock

Fixes: 89d44f0a6c73 ('net/mlx5_core: Add pci error handlers to mlx5_core 
driver')
Signed-off-by: Mohamad Haj Yahia <moha...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/health.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c 
b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 42d16b9..96a5946 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -108,15 +108,21 @@ static int in_fatal(struct mlx5_core_dev *dev)
 
 void mlx5_enter_error_state(struct mlx5_core_dev *dev)
 {
+   mutex_lock(>intf_state_mutex);
if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
-   return;
+   goto unlock;
 
mlx5_core_err(dev, "start\n");
-   if (pci_channel_offline(dev->pdev) || in_fatal(dev))
+   if (pci_channel_offline(dev->pdev) || in_fatal(dev)) {
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+   trigger_cmd_completions(dev);
+   }
 
mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0);
mlx5_core_err(dev, "end\n");
+
+unlock:
+   mutex_unlock(>intf_state_mutex);
 }
 
 static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
@@ -245,7 +251,6 @@ static void poll_health(unsigned long data)
u32 count;
 
if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
-   trigger_cmd_completions(dev);
mod_timer(>timer, get_next_poll_jiffies());
return;
}
-- 
2.8.0



[PATCH net 05/13] net/mlx5: Fix potential deadlock in command mode change

2016-06-30 Thread Saeed Mahameed
From: Mohamad Haj Yahia <moha...@mellanox.com>

Call command completion handler in case of timeout when working in
interrupts mode.
Avoid flushing the commands workqueue after acquiring the semaphores to
prevent a potential deadlock.

Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters')
Signed-off-by: Mohamad Haj Yahia <moha...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 79 +++
 1 file changed, 33 insertions(+), 46 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index fda43bc..74067f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -710,13 +710,13 @@ static int wait_func(struct mlx5_core_dev *dev, struct 
mlx5_cmd_work_ent *ent)
 
if (cmd->mode == CMD_MODE_POLLING) {
wait_for_completion(>done);
-   err = ent->ret;
-   } else {
-   if (!wait_for_completion_timeout(>done, timeout))
-   err = -ETIMEDOUT;
-   else
-   err = 0;
+   } else if (!wait_for_completion_timeout(>done, timeout)) {
+   ent->ret = -ETIMEDOUT;
+   mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
}
+
+   err = ent->ret;
+
if (err == -ETIMEDOUT) {
mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a 
command resource\n",
   mlx5_command_str(msg_to_opcode(ent->in)),
@@ -774,28 +774,26 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, 
struct mlx5_cmd_msg *in,
goto out_free;
}
 
-   if (!callback) {
-   err = wait_func(dev, ent);
-   if (err == -ETIMEDOUT)
-   goto out;
-
-   ds = ent->ts2 - ent->ts1;
-   op = be16_to_cpu(((struct mlx5_inbox_hdr 
*)in->first.data)->opcode);
-   if (op < ARRAY_SIZE(cmd->stats)) {
-   stats = >stats[op];
-   spin_lock_irq(>lock);
-   stats->sum += ds;
-   ++stats->n;
-   spin_unlock_irq(>lock);
-   }
-   mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
-  "fw exec time for %s is %lld nsec\n",
-  mlx5_command_str(op), ds);
-   *status = ent->status;
-   free_cmd(ent);
-   }
+   if (callback)
+   goto out;
 
-   return err;
+   err = wait_func(dev, ent);
+   if (err == -ETIMEDOUT)
+   goto out_free;
+
+   ds = ent->ts2 - ent->ts1;
+   op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
+   if (op < ARRAY_SIZE(cmd->stats)) {
+   stats = >stats[op];
+   spin_lock_irq(>lock);
+   stats->sum += ds;
+   ++stats->n;
+   spin_unlock_irq(>lock);
+   }
+   mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
+  "fw exec time for %s is %lld nsec\n",
+  mlx5_command_str(op), ds);
+   *status = ent->status;
 
 out_free:
free_cmd(ent);
@@ -1185,41 +1183,30 @@ err_dbg:
return err;
 }
 
-void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
+static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
 {
struct mlx5_cmd *cmd = >cmd;
int i;
 
for (i = 0; i < cmd->max_reg_cmds; i++)
down(>sem);
-
down(>pages_sem);
 
-   flush_workqueue(cmd->wq);
-
-   cmd->mode = CMD_MODE_EVENTS;
+   cmd->mode = mode;
 
up(>pages_sem);
for (i = 0; i < cmd->max_reg_cmds; i++)
up(>sem);
 }
 
-void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
+void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
 {
-   struct mlx5_cmd *cmd = >cmd;
-   int i;
-
-   for (i = 0; i < cmd->max_reg_cmds; i++)
-   down(>sem);
-
-   down(>pages_sem);
-
-   flush_workqueue(cmd->wq);
-   cmd->mode = CMD_MODE_POLLING;
+   mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS);
+}
 
-   up(>pages_sem);
-   for (i = 0; i < cmd->max_reg_cmds; i++)
-   up(>sem);
+void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
+{
+   mlx5_cmd_change_mod(dev, CMD_MODE_POLLING);
 }
 
 static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
-- 
2.8.0



Re: [PATCH net] net: poll tx timeout only on active tx queues

2016-06-30 Thread Saeed Mahameed
On Thu, Jun 30, 2016 at 5:28 PM, Eric Dumazet <eric.duma...@gmail.com> wrote:
> On Thu, 2016-06-30 at 16:58 +0300, Saeed Mahameed wrote:
>> - for (i = 0; i < dev->num_tx_queues; i++) {
>> + for (i = 0; i < dev->real_num_tx_queues; i++) {
>>   struct netdev_queue *txq;
>>
>>   txq = netdev_get_tx_queue(dev, i);
>
> Strange, why don't you change all others helpers that are using
> num_tx_queues ?
>

which other helpers ?
since this function assumes that all tx queues are started and if a
non real_txq is stopped for more that timeout period it will start
shouting call traces and warnings.

> Which driver had a problem with this code ?

non yet.
currently all the device driver call  netif_tx_start_all_queues(dev)
on open to W/A this issue. which is strange since only
real_num_tx_queues are active.


[PATCH net 07/13] net/mlx5e: Timeout if SQ doesn't flush during close

2016-06-30 Thread Saeed Mahameed
From: Daniel Jurgens <dani...@mellanox.com>

Avoid an infinite loop by timing out waiting for the SQ to flush. Also
clean up the TX descriptors if that happens.

Fixes: f62b8bb8f2d3 ('net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet 
functionality')
Signed-off-by: Daniel Jurgens <dani...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  2 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 25 +++---
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c   | 32 +++
 3 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index baa991a..c22d8c8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -305,6 +305,7 @@ struct mlx5e_sq_dma {
 enum {
MLX5E_SQ_STATE_WAKE_TXQ_ENABLE,
MLX5E_SQ_STATE_BF_ENABLE,
+   MLX5E_SQ_STATE_TX_TIMEOUT,
 };
 
 struct mlx5e_ico_wqe_info {
@@ -589,6 +590,7 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum 
mlx5_event event);
 int mlx5e_napi_poll(struct napi_struct *napi, int budget);
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
+void mlx5e_free_tx_descs(struct mlx5e_sq *sq);
 
 void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index cb6defd..b94c84b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -39,6 +39,13 @@
 #include "eswitch.h"
 #include "vxlan.h"
 
+enum {
+   MLX5_EN_QP_FLUSH_TIMEOUT_MS = 5000,
+   MLX5_EN_QP_FLUSH_MSLEEP_QUANT   = 20,
+   MLX5_EN_QP_FLUSH_MAX_ITER   = MLX5_EN_QP_FLUSH_TIMEOUT_MS /
+ MLX5_EN_QP_FLUSH_MSLEEP_QUANT,
+};
+
 struct mlx5e_rq_param {
u32rqc[MLX5_ST_SZ_DW(rqc)];
struct mlx5_wq_param   wq;
@@ -782,6 +789,9 @@ static inline void netif_tx_disable_queue(struct 
netdev_queue *txq)
 
 static void mlx5e_close_sq(struct mlx5e_sq *sq)
 {
+   int tout = 0;
+   int err;
+
if (sq->txq) {
clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, >state);
/* prevent netif_tx_wake_queue */
@@ -792,15 +802,24 @@ static void mlx5e_close_sq(struct mlx5e_sq *sq)
if (mlx5e_sq_has_room_for(sq, 1))
mlx5e_send_nop(sq, true);
 
-   mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
+   err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY,
+ MLX5_SQC_STATE_ERR);
+   if (err)
+   set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, >state);
}
 
-   while (sq->cc != sq->pc) /* wait till sq is empty */
-   msleep(20);
+   /* wait till sq is empty, unless a TX timeout occurred on this SQ */
+   while (sq->cc != sq->pc &&
+  !test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, >state)) {
+   msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT);
+   if (tout++ > MLX5_EN_QP_FLUSH_MAX_ITER)
+   set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, >state);
+   }
 
/* avoid destroying sq before mlx5e_poll_tx_cq() is done with it */
napi_synchronize(>channel->napi);
 
+   mlx5e_free_tx_descs(sq);
mlx5e_disable_sq(sq);
mlx5e_destroy_sq(sq);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 5a750b9cd..65e3bce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -341,6 +341,35 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct 
net_device *dev)
return mlx5e_sq_xmit(sq, skb);
 }
 
+void mlx5e_free_tx_descs(struct mlx5e_sq *sq)
+{
+   struct mlx5e_tx_wqe_info *wi;
+   struct sk_buff *skb;
+   u16 ci;
+   int i;
+
+   while (sq->cc != sq->pc) {
+   ci = sq->cc & sq->wq.sz_m1;
+   skb = sq->skb[ci];
+   wi = >wqe_info[ci];
+
+   if (!skb) { /* nop */
+   sq->cc++;
+   continue;
+   }
+
+   for (i = 0; i < wi->num_dma; i++) {
+   struct mlx5e_sq_dma *dma =
+   mlx5e_dma_get(sq, sq->dma_fifo_cc++);
+
+   mlx5e_tx_dma_unmap(sq->pdev, dma);
+   }
+
+   dev_kfree_skb_any(skb);
+   sq->cc += wi->num_wqebbs;
+  

[PATCH net 13/13] net/mlx5e: Log link state changes

2016-06-30 Thread Saeed Mahameed
From: Shaker Daibes <shak...@mellanox.com>

Add Link UP/Down prints to kernel log when link state changes

Signed-off-by: Shaker Daibes <shak...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 216fe3e..7a0dca2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -81,10 +81,13 @@ static void mlx5e_update_carrier(struct mlx5e_priv *priv)
port_state = mlx5_query_vport_state(mdev,
MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
 
-   if (port_state == VPORT_STATE_UP)
+   if (port_state == VPORT_STATE_UP) {
+   netdev_info(priv->netdev, "Link up\n");
netif_carrier_on(priv->netdev);
-   else
+   } else {
+   netdev_info(priv->netdev, "Link down\n");
netif_carrier_off(priv->netdev);
+   }
 }
 
 static void mlx5e_update_carrier_work(struct work_struct *work)
-- 
2.8.0



[PATCH net 01/13] net/mlx5: Fix teardown errors that happen in pci error handler

2016-06-30 Thread Saeed Mahameed
From: Mohamad Haj Yahia <moha...@mellanox.com>

In case of internal error state we will simulate the commands status
through the return value translation function, but we need to simulate
all the teardown fw commands as successful so we will not have fw
command failure prints.
This also fix memory leaks that happen because we skip teardown stages
due to failed fw commands.

Fixes: 89d44f0a6c73 ('net/mlx5_core: Add pci error handlers to mlx5_core 
driver')
Signed-off-by: Mohamad Haj Yahia <moha...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 0b49862..fda43bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -295,6 +295,12 @@ static int mlx5_internal_err_ret_value(struct 
mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_DESTROY_FLOW_GROUP:
case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY:
case MLX5_CMD_OP_DEALLOC_FLOW_COUNTER:
+   case MLX5_CMD_OP_2ERR_QP:
+   case MLX5_CMD_OP_2RST_QP:
+   case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT:
+   case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
+   case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+   case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
return MLX5_CMD_STAT_OK;
 
case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -321,8 +327,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev 
*dev, u16 op,
case MLX5_CMD_OP_RTR2RTS_QP:
case MLX5_CMD_OP_RTS2RTS_QP:
case MLX5_CMD_OP_SQERR2RTS_QP:
-   case MLX5_CMD_OP_2ERR_QP:
-   case MLX5_CMD_OP_2RST_QP:
case MLX5_CMD_OP_QUERY_QP:
case MLX5_CMD_OP_SQD_RTS_QP:
case MLX5_CMD_OP_INIT2INIT_QP:
@@ -342,7 +346,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev 
*dev, u16 op,
case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
case MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT:
case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
-   case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT:
case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
case MLX5_CMD_OP_SET_ROCE_ADDRESS:
case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
@@ -390,11 +393,12 @@ static int mlx5_internal_err_ret_value(struct 
mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_CREATE_RQT:
case MLX5_CMD_OP_MODIFY_RQT:
case MLX5_CMD_OP_QUERY_RQT:
+
case MLX5_CMD_OP_CREATE_FLOW_TABLE:
case MLX5_CMD_OP_QUERY_FLOW_TABLE:
case MLX5_CMD_OP_CREATE_FLOW_GROUP:
case MLX5_CMD_OP_QUERY_FLOW_GROUP:
-   case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+
case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
-- 
2.8.0



[PATCH net-next V2 12/16] net/mlx5e: TIRs management refactoring

2016-06-30 Thread Saeed Mahameed
From: Hadar Hen Zion <had...@mellanox.com>

The current refresh tirs self loopback mechanism, refreshes all the tirs
belonging to the same mlx5e instance to prevent self loopback by packets
sent over any ring of that instance. This mechanism relies on all the
tirs/tises of an instance to be created with the same transport domain
number (tdn).

Change the driver to refresh all the tirs created under the same tdn
regardless of which mlx5e netdev instance they belong to.

This behaviour is needed for introducing new mlx5e instances which serve
to represent SRIOV VFs. The representors and the PF share vport used for
E-Switch management, and we want to avoid NIC level HW loopback between
them, e.g when sending broadcast packets. To achieve that, both the
representors and the PF NIC will share the tdn.

This patch doesn't add any new functionality.

Signed-off-by: Hadar Hen Zion <had...@mellanox.com>
Reviewed-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h   | 12 +++--
 drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c  | 14 +++---
 .../net/ethernet/mellanox/mlx5/core/en_common.c| 48 +++
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c|  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 56 +-
 6 files changed, 77 insertions(+), 57 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index da93bf55..ded3f96 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -552,9 +552,10 @@ struct mlx5e_flow_steering {
struct mlx5e_arfs_tablesarfs;
 };
 
-struct mlx5e_direct_tir {
+struct mlx5e_tir {
u32  tirn;
u32  rqtn;
+   struct list_head list;
 };
 
 enum {
@@ -576,8 +577,8 @@ struct mlx5e_priv {
struct mlx5e_channel **channel;
u32tisn[MLX5E_MAX_NUM_TC];
u32indir_rqtn;
-   u32indir_tirn[MLX5E_NUM_INDIR_TIRS];
-   struct mlx5e_direct_tirdirect_tir[MLX5E_MAX_NUM_CHANNELS];
+   struct mlx5e_tir   indir_tir[MLX5E_NUM_INDIR_TIRS];
+   struct mlx5e_tir   direct_tir[MLX5E_MAX_NUM_CHANNELS];
u32tx_rates[MLX5E_MAX_NUM_SQS];
 
struct mlx5e_flow_steering fs;
@@ -784,7 +785,12 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const 
struct sk_buff *skb,
 #endif
 
 u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev);
+int mlx5e_create_tir(struct mlx5_core_dev *mdev,
+struct mlx5e_tir *tir, u32 *in, int inlen);
+void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
+  struct mlx5e_tir *tir);
 int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev);
 void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
+int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev);
 
 #endif /* __MLX5_EN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index 3515e78..10f18d4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -93,14 +93,14 @@ static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type 
type)
 static int arfs_disable(struct mlx5e_priv *priv)
 {
struct mlx5_flow_destination dest;
-   u32 *tirn = priv->indir_tirn;
+   struct mlx5e_tir *tir = priv->indir_tir;
int err = 0;
int tt;
int i;
 
dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
for (i = 0; i < ARFS_NUM_TYPES; i++) {
-   dest.tir_num = tirn[i];
+   dest.tir_num = tir[i].tirn;
tt = arfs_get_tt(i);
/* Modify ttc rules destination to bypass the aRFS tables*/
err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt],
@@ -176,7 +176,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
struct arfs_table *arfs_t = >fs.arfs.arfs_tables[type];
struct mlx5_flow_destination dest;
u8 match_criteria_enable = 0;
-   u32 *tirn = priv->indir_tirn;
+   struct mlx5e_tir *tir = priv->indir_tir;
u32 *match_criteria;
u32 *match_value;
int err = 0;
@@ -192,16 +192,16 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
switch (type) {
case ARFS_IPV4_TCP:
-   dest.tir_num = tirn[MLX5E_TT_IPV4_TCP];
+   dest.tir_num = tir[MLX5E_TT_IPV4_TCP].tirn;
break;
case ARFS_IPV4_UDP:
-   dest.tir_num = tirn[MLX5E_TT_IPV4_UDP];
+   dest.tir_num = tir[MLX5E_TT_IPV4_U

[PATCH net-next V2 05/16] net/mlx5: Introduce offloads steering namespace

2016-06-30 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Add a new namespace (MLX5_FLOW_NAMESPACE_OFFLOADS) to be populated
with flow steering rules that deal with rules that have have to
be executed before the EN NIC steering rules are matched.

The namespace is located after the bypass name-space and before the
kernel name-space. Therefore, it precedes the HW processing done for
rules set for the kernel NIC name-space.

Under SRIOV, it would allow us to match on e-switch missed packet
and forward them to the relevant VF representor TIR.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Amir Vadai <a...@vadai.me>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 11 ++-
 include/linux/mlx5/fs.h   |  1 +
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index e912a3d..b040110 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -83,6 +83,11 @@
 #define ANCHOR_NUM_LEVELS 1
 #define ANCHOR_NUM_PRIOS 1
 #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1)
+
+#define OFFLOADS_MAX_FT 1
+#define OFFLOADS_NUM_PRIOS 1
+#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1)
+
 struct node_caps {
size_t  arr_sz;
long*caps;
@@ -98,7 +103,7 @@ static struct init_tree_node {
int num_levels;
 } root_fs = {
.type = FS_TYPE_NAMESPACE,
-   .ar_size = 4,
+   .ar_size = 5,
.children = (struct init_tree_node[]) {
ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
 
FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
@@ -107,6 +112,9 @@ static struct init_tree_node {
  
FS_CAP(flow_table_properties_nic_receive.flow_table_modify)),
 ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
  BY_PASS_PRIO_NUM_LEVELS))),
+   ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {},
+ADD_NS(ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, 
OFFLOADS_MAX_FT))),
+
ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {},
 ADD_NS(ADD_MULTIPLE_PRIO(1, 1),
ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS,
@@ -1369,6 +1377,7 @@ struct mlx5_flow_namespace 
*mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 
switch (type) {
case MLX5_FLOW_NAMESPACE_BYPASS:
+   case MLX5_FLOW_NAMESPACE_OFFLOADS:
case MLX5_FLOW_NAMESPACE_KERNEL:
case MLX5_FLOW_NAMESPACE_LEFTOVERS:
case MLX5_FLOW_NAMESPACE_ANCHOR:
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 4b7a107..6ad1119 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -54,6 +54,7 @@ static inline void build_leftovers_ft_param(int *priority,
 
 enum mlx5_flow_namespace_type {
MLX5_FLOW_NAMESPACE_BYPASS,
+   MLX5_FLOW_NAMESPACE_OFFLOADS,
MLX5_FLOW_NAMESPACE_KERNEL,
MLX5_FLOW_NAMESPACE_LEFTOVERS,
MLX5_FLOW_NAMESPACE_ANCHOR,
-- 
2.8.0



[PATCH net-next V2 13/16] net/mlx5e: Mark enabled RQTs instances explicitly

2016-06-30 Thread Saeed Mahameed
From: Hadar Hen Zion <had...@mellanox.com>

In the current driver implementation two types of receive queue
tables (RQTs) are in use - direct and indirect.

Change the driver to mark each new created RQT (direct or indirect)
as "enabled". This behaviour is needed for introducing new mlx5e
instances which serve to represent SRIOV VFs.

The VF representors will have only one type of RQTs (direct).

An "enabled" flag is added to each RQT to allow better handling
and code sharing between the representors and the nic netdevices.

This patch doesn't add any new functionality.

Signed-off-by: Hadar Hen Zion <had...@mellanox.com>
Reviewed-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h   | 13 +--
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 45 +-
 3 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index ded3f96..1843a4c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -552,10 +552,15 @@ struct mlx5e_flow_steering {
struct mlx5e_arfs_tablesarfs;
 };
 
-struct mlx5e_tir {
-   u32  tirn;
+struct mlx5e_rqt {
u32  rqtn;
-   struct list_head list;
+   bool enabled;
+};
+
+struct mlx5e_tir {
+   u32   tirn;
+   struct mlx5e_rqt  rqt;
+   struct list_head  list;
 };
 
 enum {
@@ -576,7 +581,7 @@ struct mlx5e_priv {
 
struct mlx5e_channel **channel;
u32tisn[MLX5E_MAX_NUM_TC];
-   u32indir_rqtn;
+   struct mlx5e_rqt   indir_rqt;
struct mlx5e_tir   indir_tir[MLX5E_NUM_INDIR_TIRS];
struct mlx5e_tir   direct_tir[MLX5E_MAX_NUM_CHANNELS];
u32tx_rates[MLX5E_MAX_NUM_SQS];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 877cf68..7c5c477 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -898,7 +898,7 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 
*indir,
mutex_lock(>state_lock);
 
if (indir) {
-   u32 rqtn = priv->indir_rqtn;
+   u32 rqtn = priv->indir_rqt.rqtn;
 
memcpy(priv->params.indirection_rqt, indir,
   sizeof(priv->params.indirection_rqt));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 808dff4..db890b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1487,7 +1487,8 @@ static void mlx5e_fill_direct_rqt_rqn(struct mlx5e_priv 
*priv, void *rqtc,
MLX5_SET(rqtc, rqtc, rq_num[0], rqn);
 }
 
-static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, int ix, u32 *rqtn)
+static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz,
+   int ix, struct mlx5e_rqt *rqt)
 {
struct mlx5_core_dev *mdev = priv->mdev;
void *rqtc;
@@ -1510,34 +1511,37 @@ static int mlx5e_create_rqt(struct mlx5e_priv *priv, 
int sz, int ix, u32 *rqtn)
else
mlx5e_fill_direct_rqt_rqn(priv, rqtc, ix);
 
-   err = mlx5_core_create_rqt(mdev, in, inlen, rqtn);
+   err = mlx5_core_create_rqt(mdev, in, inlen, >rqtn);
+   if (!err)
+   rqt->enabled = true;
 
kvfree(in);
return err;
 }
 
-static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, u32 rqtn)
+static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt)
 {
-   mlx5_core_destroy_rqt(priv->mdev, rqtn);
+   rqt->enabled = false;
+   mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn);
 }
 
 static int mlx5e_create_rqts(struct mlx5e_priv *priv)
 {
int nch = mlx5e_get_max_num_channels(priv->mdev);
-   u32 *rqtn;
+   struct mlx5e_rqt *rqt;
int err;
int ix;
 
/* Indirect RQT */
-   rqtn = >indir_rqtn;
-   err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqtn);
+   rqt = >indir_rqt;
+   err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt);
if (err)
return err;
 
/* Direct RQTs */
for (ix = 0; ix < nch; ix++) {
-   rqtn = >direct_tir[ix].rqtn;
-   err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqtn);
+   rqt = >direct_tir[ix].rqt;
+   err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqt);
if (err)
goto err_

[PATCH net-next V2 03/16] net/mlx5: E-Switch, Add miss rule for offloads mode

2016-06-30 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

In the sriov offloads mode, packets that are not matched by any other
rule should be sent towards the e-switch manager for further processing.

Add such "miss" rule which matches ANY packet as the last rule in the
e-switch FDB and programs the HW to send the packet to vport 0 where
the e-switch manager runs.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  1 +
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 40 ++
 2 files changed, 41 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 2360180..8eed33f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -144,6 +144,7 @@ struct mlx5_eswitch_fdb {
struct offloads_fdb {
struct mlx5_flow_group *send_to_vport_grp;
struct mlx5_flow_group *miss_grp;
+   struct mlx5_flow_rule  *miss_rule;
} offloads;
};
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index c6b28df..e3d81ae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -38,6 +38,39 @@
 #include "mlx5_core.h"
 #include "eswitch.h"
 
+static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
+{
+   struct mlx5_flow_destination dest;
+   struct mlx5_flow_rule *flow_rule = NULL;
+   u32 *match_v, *match_c;
+   int err = 0;
+
+   match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+   match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+   if (!match_v || !match_c) {
+   esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n");
+   err = -ENOMEM;
+   goto out;
+   }
+
+   dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+   dest.vport_num = 0;
+
+   flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, 0, match_c, match_v,
+  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, 0, 
);
+   if (IS_ERR(flow_rule)) {
+   err = PTR_ERR(flow_rule);
+   esw_warn(esw->dev,  "FDB: Failed to add miss flow rule err 
%d\n", err);
+   goto out;
+   }
+
+   esw->fdb_table.offloads.miss_rule = flow_rule;
+out:
+   kfree(match_v);
+   kfree(match_c);
+   return err;
+}
+
 #define MAX_PF_SQ 256
 
 int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
@@ -110,8 +143,14 @@ int esw_create_offloads_fdb_table(struct mlx5_eswitch 
*esw, int nvports)
}
esw->fdb_table.offloads.miss_grp = g;
 
+   err = esw_add_fdb_miss_rule(esw);
+   if (err)
+   goto miss_rule_err;
+
return 0;
 
+miss_rule_err:
+   mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
 miss_err:
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
 send_vport_err:
@@ -128,6 +167,7 @@ void esw_destroy_offloads_fdb_table(struct mlx5_eswitch 
*esw)
return;
 
esw_debug(esw->dev, "Destroy offloads FDB Table\n");
+   mlx5_del_flow_rule(esw->fdb_table.offloads.miss_rule);
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
 
-- 
2.8.0



[PATCH net-next V2 16/16] net/mlx5e: Introduce SRIOV VF representors

2016-06-30 Thread Saeed Mahameed
From: Hadar Hen Zion <had...@mellanox.com>

Implement the relevant profile functions to create mlx5e driver instance
serving as VF representor. When SRIOV offloads mode is enabled, each VF
will have a representor netdevice instance on the host.

To do that, we also export set of shared service functions from en_main.c,
such that they can be used by both NIC and repsresentors netdevs.

The newly created representor netdevice has a basic set of net_device_ops
which are the same ndo functions as the NIC netdevice and an ndo of it's
own for phys port name.

The profiling infrastructure allow sharing code between the NIC and the
vport representor even though the representor has only a subset of the
NIC functionality.

The VF reps and the PF which is used in that mode to represent the uplink,
expose switchdev ops. Currently the only op supposed is attr get for the
port parent ID which here serves to identify net-devices belonging to the
same HW E-Switch. Other than that, no offloading is implemented and hence
switching functionality is achieved if one sets SW switching rules, e.g
using tc, bridge or ovs.

Port phys name (ndo_get_phys_port_name) is implemented to allow exporting
to user-space the VF vport number and along with the switchdev port parent
id (phys_switch_id) enable a udev base consistent naming scheme:

SUBSYSTEM=="net", ACTION=="add", ATTR{phys_switch_id}=="", \
ATTR{phys_port_name}!="", NAME="$PF_NIC$attr{phys_port_name}"

where phys_switch_id is exposed by the PF (and VF reps) and $PF_NIC is
the name of the PF netdevice.

Signed-off-by: Hadar Hen Zion <had...@mellanox.com>
Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |  28 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  53 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   | 394 +
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  20 +-
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c |  96 -
 6 files changed, 574 insertions(+), 19 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile 
b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 9b14dad..a574dea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -8,6 +8,6 @@ mlx5_core-y :=  main.o cmd.o debugfs.o fw.o eq.o uar.o 
pagealloc.o \
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \
-   en_tc.o en_arfs.o
+   en_tc.o en_arfs.o en_rep.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index f61255c..5912a02 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -44,6 +44,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "wq.h"
 #include "mlx5_core.h"
 #include "en_stats.h"
@@ -816,4 +817,31 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev 
*mdev);
 void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
 int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev);
 
+struct mlx5_eswitch_rep;
+int mlx5e_vport_rep_load(struct mlx5_eswitch *esw,
+struct mlx5_eswitch_rep *rep);
+void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw,
+   struct mlx5_eswitch_rep *rep);
+int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep);
+void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep);
+int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
+void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
+int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr);
+
+int mlx5e_create_direct_rqts(struct mlx5e_priv *priv);
+void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
+int mlx5e_create_direct_tirs(struct mlx5e_priv *priv);
+void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv);
+int mlx5e_create_tises(struct mlx5e_priv *priv);
+void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv);
+int mlx5e_close(struct net_device *netdev);
+int mlx5e_open(struct net_device *netdev);
+void mlx5e_update_stats_work(struct work_struct *work);
+void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
+ const struct mlx5e_profile *profile, void *ppriv);
+void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv);
+struct rtnl_link_stats64 *
+mlx5e_get_stats(struct n

[PATCH net-next V2 15/16] net/mlx5: Add Representors registration API

2016-06-30 Thread Saeed Mahameed
From: Hadar Hen Zion <had...@mellanox.com>

Introduce E-Switch registration/unregister representors functions.

Those functions are called by the mlx5e driver when the PF NIC is
created upon pci probe action regardless of the E-Switch mode (NONE,
LEGACY or OFFLOADS).

Adding basic E-Switch database that will hold the vport represntors
upon creation.

This patch doesn't add any new functionality.

Signed-off-by: Hadar Hen Zion <had...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |  3 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 60 +++---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  | 10 
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  | 12 +
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 19 +++
 5 files changed, 97 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 8d4d2b2..f61255c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -571,7 +571,7 @@ enum {
 struct mlx5e_profile {
void(*init)(struct mlx5_core_dev *mdev,
struct net_device *netdev,
-   const struct mlx5e_profile *profile);
+   const struct mlx5e_profile *profile, void *ppriv);
void(*cleanup)(struct mlx5e_priv *priv);
int (*init_rx)(struct mlx5e_priv *priv);
void(*cleanup_rx)(struct mlx5e_priv *priv);
@@ -618,6 +618,7 @@ struct mlx5e_priv {
struct mlx5e_tstamptstamp;
u16 q_counter;
const struct mlx5e_profile *profile;
+   void  *ppriv;
 };
 
 enum mlx5e_link_mode {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 8ffe68b..bfe3a4c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2881,7 +2881,8 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params 
*params, u8 cq_period_mode)
 
 static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
struct net_device *netdev,
-   const struct mlx5e_profile *profile)
+   const struct mlx5e_profile *profile,
+   void *ppriv)
 {
struct mlx5e_priv *priv = netdev_priv(netdev);
u32 link_speed = 0;
@@ -2963,6 +2964,7 @@ static void mlx5e_build_nic_netdev_priv(struct 
mlx5_core_dev *mdev,
priv->netdev   = netdev;
priv->params.num_channels  = profile->max_nch(mdev);
priv->profile  = profile;
+   priv->ppriv= ppriv;
 
 #ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_ets_init(priv);
@@ -3127,18 +3129,25 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv 
*priv)
 
 static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
   struct net_device *netdev,
-  const struct mlx5e_profile *profile)
+  const struct mlx5e_profile *profile,
+  void *ppriv)
 {
struct mlx5e_priv *priv = netdev_priv(netdev);
 
-   mlx5e_build_nic_netdev_priv(mdev, netdev, profile);
+   mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv);
mlx5e_build_nic_netdev(netdev);
mlx5e_vxlan_init(priv);
 }
 
 static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
 {
+   struct mlx5_core_dev *mdev = priv->mdev;
+   struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
mlx5e_vxlan_cleanup(priv);
+
+   if (MLX5_CAP_GEN(mdev, vport_group_manager))
+   mlx5_eswitch_unregister_vport_rep(esw, 0);
 }
 
 static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
@@ -3230,6 +3239,8 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
 {
struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
+   struct mlx5_eswitch *esw = mdev->priv.eswitch;
+   struct mlx5_eswitch_rep rep;
 
if (mlx5e_vxlan_allowed(mdev)) {
rtnl_lock();
@@ -3239,6 +3250,12 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
 
mlx5e_enable_async_events(priv);
queue_work(priv->wq, >set_rx_mode_work);
+
+   if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
+   rep.vport = 0;
+   rep.priv_data = priv;
+   mlx5_eswitch_register_vport_rep(esw, );
+   }
 }
 
 static void mlx5e_nic_disable(struct mlx5e_priv *priv)
@@ -3262,7 +3279,7 @@ static const struct mlx5e_profile mlx5e_nic_profile = {
 };
 
 static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
- 

[PATCH net-next V2 14/16] net/mlx5e: Add support for multiple profiles

2016-06-30 Thread Saeed Mahameed
From: Hadar Hen Zion <had...@mellanox.com>

To allow support in representor netdevices where we create more than one
netdevice per NIC, add profiles to the mlx5e driver. The profiling
allows for creation of mlx5e instances with different characteristics.

Each profile implements its own behavior using set of function pointers
defined in struct mlx5e_profile. This is done to allow for avoiding complex
per profix branching in the code.

Currently only the profile for the conventional NIC is implemented,
which is of use when a netdev is created upon pci probe.

This patch doesn't add any new functionality.

Signed-off-by: Hadar Hen Zion <had...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  17 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 341 ++
 2 files changed, 240 insertions(+), 118 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 1843a4c..8d4d2b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -568,6 +568,22 @@ enum {
MLX5E_NIC_PRIO
 };
 
+struct mlx5e_profile {
+   void(*init)(struct mlx5_core_dev *mdev,
+   struct net_device *netdev,
+   const struct mlx5e_profile *profile);
+   void(*cleanup)(struct mlx5e_priv *priv);
+   int (*init_rx)(struct mlx5e_priv *priv);
+   void(*cleanup_rx)(struct mlx5e_priv *priv);
+   int (*init_tx)(struct mlx5e_priv *priv);
+   void(*cleanup_tx)(struct mlx5e_priv *priv);
+   void(*enable)(struct mlx5e_priv *priv);
+   void(*disable)(struct mlx5e_priv *priv);
+   void(*update_stats)(struct mlx5e_priv *priv);
+   int (*max_nch)(struct mlx5_core_dev *mdev);
+   int max_tc;
+};
+
 struct mlx5e_priv {
/* priv data path fields - start */
struct mlx5e_sq**txq_to_sq_map;
@@ -601,6 +617,7 @@ struct mlx5e_priv {
struct mlx5e_stats stats;
struct mlx5e_tstamptstamp;
u16 q_counter;
+   const struct mlx5e_profile *profile;
 };
 
 enum mlx5e_link_mode {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index db890b2..8ffe68b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -234,7 +234,7 @@ static void mlx5e_update_stats_work(struct work_struct 
*work)
   update_stats_work);
mutex_lock(>state_lock);
if (test_bit(MLX5E_STATE_OPENED, >state)) {
-   mlx5e_update_stats(priv);
+   priv->profile->update_stats(priv);
queue_delayed_work(priv->wq, dwork,
   
msecs_to_jiffies(MLX5E_UPDATE_STATS_INTERVAL));
}
@@ -1037,7 +1037,7 @@ static void mlx5e_build_channeltc_to_txq_map(struct 
mlx5e_priv *priv, int ix)
 {
int i;
 
-   for (i = 0; i < MLX5E_MAX_NUM_TC; i++)
+   for (i = 0; i < priv->profile->max_tc; i++)
priv->channeltc_to_txq_map[ix][i] =
ix + i * priv->params.num_channels;
 }
@@ -1525,21 +1525,20 @@ static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, 
struct mlx5e_rqt *rqt)
mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn);
 }
 
-static int mlx5e_create_rqts(struct mlx5e_priv *priv)
+static int mlx5e_create_indirect_rqts(struct mlx5e_priv *priv)
+{
+   struct mlx5e_rqt *rqt = >indir_rqt;
+
+   return mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt);
+}
+
+static int mlx5e_create_direct_rqts(struct mlx5e_priv *priv)
 {
-   int nch = mlx5e_get_max_num_channels(priv->mdev);
struct mlx5e_rqt *rqt;
int err;
int ix;
 
-   /* Indirect RQT */
-   rqt = >indir_rqt;
-   err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt);
-   if (err)
-   return err;
-
-   /* Direct RQTs */
-   for (ix = 0; ix < nch; ix++) {
+   for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) {
rqt = >direct_tir[ix].rqt;
err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqt);
if (err)
@@ -1552,22 +1551,9 @@ err_destroy_rqts:
for (ix--; ix >= 0; ix--)
mlx5e_destroy_rqt(priv, >direct_tir[ix].rqt);
 
-   mlx5e_destroy_rqt(priv, >indir_rqt);
-
return err;
 }
 
-static void mlx5e_destroy_rqts(struct mlx5e_priv *priv)
-{
-   int nch = mlx5e_get_max_num_channels(priv->mdev);
-   int i;
-
-   for (i = 0; i < nch; i++)
-   mlx5e_destroy_rqt(priv, >direct_tir[i].rqt);
-
-   mlx5e_destroy_rqt(priv, >indir_rqt);
-}
-
 int mlx5e_redirect_rqt(struct mlx5e_pri

[PATCH net-next V2 10/16] net/mlx5e: Add devlink based SRIOV mode changes

2016-06-30 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Implement handlers for the devlink commands to get and set the SRIOV
E-Switch mode.

When turning to the switchdev/offloads mode, we disable the e-switch
and enable it again in the new mode, create the NIC offloads table
and create VF reps.

When turning to legacy mode, we remove the VF reps and the offloads
table, and re-initiate the e-switch in it's legacy mode.

The actual creation/removal of the VF reps is done in downstream patches.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  12 +-
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 121 -
 2 files changed, 124 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 1fc4cfd..12f509c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -81,8 +81,8 @@ enum {
MC_ADDR_CHANGE | \
PROMISC_CHANGE)
 
-int  esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports);
-void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw);
+int esw_offloads_init(struct mlx5_eswitch *esw, int nvports);
+void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports);
 
 static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
u32 events_mask)
@@ -1561,7 +1561,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, 
int nvfs, int mode)
if (mode == SRIOV_LEGACY)
err = esw_create_legacy_fdb_table(esw, nvfs + 1);
else
-   err = esw_create_offloads_fdb_table(esw, nvfs + 1);
+   err = esw_offloads_init(esw, nvfs + 1);
if (err)
goto abort;
 
@@ -1581,6 +1581,7 @@ abort:
 void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 {
struct esw_mc_addr *mc_promisc;
+   int nvports;
int i;
 
if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
@@ -1591,6 +1592,7 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 esw->enabled_vports, esw->mode);
 
mc_promisc = esw->mc_promisc;
+   nvports = esw->enabled_vports;
 
for (i = 0; i < esw->total_vports; i++)
esw_disable_vport(esw, i);
@@ -1600,8 +1602,8 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 
if (esw->mode == SRIOV_LEGACY)
esw_destroy_legacy_fdb_table(esw);
-   else
-   esw_destroy_offloads_fdb_table(esw);
+   else if (esw->mode == SRIOV_OFFLOADS)
+   esw_offloads_cleanup(esw, nvports);
 
esw->mode = SRIOV_NONE;
/* VPORT 0 (PF) must be enabled back with non-sriov configuration */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index e1727a9..312b6f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -112,7 +112,7 @@ out:
 
 #define MAX_PF_SQ 256
 
-int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
+static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
 {
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_core_dev *dev = esw->dev;
@@ -200,7 +200,7 @@ ns_err:
return err;
 }
 
-void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw)
+static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw)
 {
if (!esw->fdb_table.fdb)
return;
@@ -329,12 +329,125 @@ out:
return flow_rule;
 }
 
+static int esw_offloads_start(struct mlx5_eswitch *esw)
+{
+   int err, num_vfs = esw->dev->priv.sriov.num_vfs;
+
+   if (esw->mode != SRIOV_LEGACY) {
+   esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not 
enabled\n");
+   return -EINVAL;
+   }
+
+   mlx5_eswitch_disable_sriov(esw);
+   err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
+   if (err)
+   esw_warn(esw->dev, "Failed set eswitch to offloads, err %d\n", 
err);
+   return err;
+}
+
+int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
+{
+   int err;
+
+   err = esw_create_offloads_fdb_table(esw, nvports);
+   if (err)
+   return err;
+
+   err = esw_create_offloads_table(esw);
+   if (err)
+   goto create_ft_err;
+
+   err = esw_create_vport_rx_group(esw);
+   if (err)
+   goto create_fg_err;
+
+   return 0;
+
+create_fg_err:
+   esw_destroy_offloads_table(esw);
+
+create_ft_err:
+   esw_destroy_offloads_f

[PATCH net-next V2 08/16] net/devlink: Add E-Switch mode control

2016-06-30 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Add the commands to set and show the mode of SRIOV E-Switch, two modes
are supported:

* legacy: operating in the "old" L2 based mode (DMAC --> VF vport)

* switchdev: the E-Switch is referred to as whitebox switch configured
using standard tools such as tc, bridge, openvswitch etc. To allow
working with the tools, for each VF, a VF representor netdevice is
created by the E-Switch manager vendor device driver instance (e.g PF).

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 include/net/devlink.h|  3 ++
 include/uapi/linux/devlink.h |  8 
 net/core/devlink.c   | 87 
 3 files changed, 98 insertions(+)

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 1d45b61..c99ffe8 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -90,6 +90,9 @@ struct devlink_ops {
   u16 tc_index,
   enum devlink_sb_pool_type pool_type,
   u32 *p_cur, u32 *p_max);
+
+   int (*eswitch_mode_get)(struct devlink *devlink, u16 *p_mode);
+   int (*eswitch_mode_set)(struct devlink *devlink, u16 mode);
 };
 
 static inline void *devlink_priv(struct devlink *devlink)
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index ba0073b..915bfa7 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -57,6 +57,8 @@ enum devlink_command {
DEVLINK_CMD_SB_OCC_SNAPSHOT,
DEVLINK_CMD_SB_OCC_MAX_CLEAR,
 
+   DEVLINK_CMD_ESWITCH_MODE_GET,
+   DEVLINK_CMD_ESWITCH_MODE_SET,
/* add new commands above here */
 
__DEVLINK_CMD_MAX,
@@ -95,6 +97,11 @@ enum devlink_sb_threshold_type {
 
 #define DEVLINK_SB_THRESHOLD_TO_ALPHA_MAX 20
 
+enum devlink_eswitch_mode {
+   DEVLINK_ESWITCH_MODE_LEGACY,
+   DEVLINK_ESWITCH_MODE_SWITCHDEV,
+};
+
 enum devlink_attr {
/* don't change the order or add anything between, this is ABI! */
DEVLINK_ATTR_UNSPEC,
@@ -125,6 +132,7 @@ enum devlink_attr {
DEVLINK_ATTR_SB_TC_INDEX,   /* u16 */
DEVLINK_ATTR_SB_OCC_CUR,/* u32 */
DEVLINK_ATTR_SB_OCC_MAX,/* u32 */
+   DEVLINK_ATTR_ESWITCH_MODE,  /* u16 */
 
/* add new attributes above here, update the policy in devlink.c */
 
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 933e8d4..b2e592a 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1394,6 +1394,78 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct 
sk_buff *skb,
return -EOPNOTSUPP;
 }
 
+static int devlink_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
+   enum devlink_command cmd, u32 portid,
+   u32 seq, int flags, u16 mode)
+{
+   void *hdr;
+
+   hdr = genlmsg_put(msg, portid, seq, _nl_family, flags, cmd);
+   if (!hdr)
+   return -EMSGSIZE;
+
+   if (devlink_nl_put_handle(msg, devlink))
+   goto nla_put_failure;
+
+   if (nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode))
+   goto nla_put_failure;
+
+   genlmsg_end(msg, hdr);
+   return 0;
+
+nla_put_failure:
+   genlmsg_cancel(msg, hdr);
+   return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb,
+   struct genl_info *info)
+{
+   struct devlink *devlink = info->user_ptr[0];
+   const struct devlink_ops *ops = devlink->ops;
+   struct sk_buff *msg;
+   u16 mode;
+   int err;
+
+   if (!ops || !ops->eswitch_mode_get)
+   return -EOPNOTSUPP;
+
+   err = ops->eswitch_mode_get(devlink, );
+   if (err)
+   return err;
+
+   msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+   if (!msg)
+   return -ENOMEM;
+
+   err = devlink_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_MODE_GET,
+  info->snd_portid, info->snd_seq, 0, mode);
+
+   if (err) {
+   nlmsg_free(msg);
+   return err;
+   }
+
+   return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_eswitch_mode_set_doit(struct sk_buff *skb,
+   struct genl_info *info)
+{
+   struct devlink *devlink = info->user_ptr[0];
+   const struct devlink_ops *ops = devlink->ops;
+   u16 mode;
+
+   if (!info->attrs[DEVLINK_ATTR_ESWITCH_MODE])
+   return -EINVAL;
+
+   mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
+
+   if (ops && ops->eswitch_mode_set)
+   return ops->eswitch_mode_set(devlink, mode);
+   return -EOPNOTSUPP;

[PATCH net-next V2 04/16] net/mlx5: E-Switch, Add API to create send-to-vport rules

2016-06-30 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Add the API to create send-to-vport e-switch rules of the form

 packet meta-data :: send-queue-number == $SQN and source-vport == 0 --> $VPORT

These rules are to be used for a send-to-vport logic which conceptually bypasses
the "normal" steering rules currently present at the e-switch datapath.

Such rule should apply only for packets that originate in the e-switch manager
vport (0) and are sent for a given SQN which is used by a given VF representor
device, and hence the matching logic.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  3 +-
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 39 ++
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 8eed33f..b7fabd1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -193,6 +193,8 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
 int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 int vport,
 struct ifla_vf_stats *vf_stats);
+struct mlx5_flow_rule *
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 
sqn);
 
 #define MLX5_DEBUG_ESWITCH_MASK BIT(3)
 
@@ -204,5 +206,4 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 
 #define esw_debug(dev, format, ...)\
mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__)
-
 #endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index e3d81ae..8964f71 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -38,6 +38,45 @@
 #include "mlx5_core.h"
 #include "eswitch.h"
 
+struct mlx5_flow_rule *
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 
sqn)
+{
+   struct mlx5_flow_destination dest;
+   struct mlx5_flow_rule *flow_rule;
+   int match_header = MLX5_MATCH_MISC_PARAMETERS;
+   u32 *match_v, *match_c;
+   void *misc;
+
+   match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+   match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+   if (!match_v || !match_c) {
+   esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n");
+   flow_rule = ERR_PTR(-ENOMEM);
+   goto out;
+   }
+
+   misc = MLX5_ADDR_OF(fte_match_param, match_v, misc_parameters);
+   MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
+   MLX5_SET(fte_match_set_misc, misc, source_port, 0x0); /* source vport 
is 0 */
+
+   misc = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters);
+   MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
+   MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+   dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+   dest.vport_num = vport;
+
+   flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, match_header, 
match_c,
+  match_v, 
MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+  0, );
+   if (IS_ERR(flow_rule))
+   esw_warn(esw->dev, "FDB: Failed to add send to vport rule err 
%ld\n", PTR_ERR(flow_rule));
+out:
+   kfree(match_v);
+   kfree(match_c);
+   return flow_rule;
+}
+
 static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 {
struct mlx5_flow_destination dest;
-- 
2.8.0



[PATCH net-next V2 07/16] net/mlx5: E-Switch, Add API to create vport rx rules

2016-06-30 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Add the API to create vport rx rules of the form

packet meta-data :: vport == $VPORT --> $TIR

where the TIR is opened by this VF representor.

This logic will by used for packets that didn't match any rule in the
e-switch datapath and should be received into the host OS through the
netdevice that represents the VF they were sent from.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  4 +
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 85 ++
 2 files changed, 89 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 32db37a..cf959f7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -157,6 +157,7 @@ enum {
 
 struct mlx5_esw_offload {
struct mlx5_flow_table *ft_offloads;
+   struct mlx5_flow_group *vport_rx_group;
 };
 
 struct mlx5_eswitch {
@@ -201,6 +202,9 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 struct mlx5_flow_rule *
 mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 
sqn);
 
+struct mlx5_flow_rule *
+mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 
tirn);
+
 #define MLX5_DEBUG_ESWITCH_MASK BIT(3)
 
 #define esw_info(dev, format, ...) \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index e895c6f..7aad367 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -243,3 +243,88 @@ static void esw_destroy_offloads_table(struct mlx5_eswitch 
*esw)
 
mlx5_destroy_flow_table(offloads->ft_offloads);
 }
+
+static int esw_create_vport_rx_group(struct mlx5_eswitch *esw)
+{
+   int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+   struct mlx5_flow_group *g;
+   struct mlx5_priv *priv = >dev->priv;
+   u32 *flow_group_in;
+   void *match_criteria, *misc;
+   int err = 0;
+   int nvports = priv->sriov.num_vfs + 2;
+
+   flow_group_in = mlx5_vzalloc(inlen);
+   if (!flow_group_in)
+   return -ENOMEM;
+
+   /* create vport rx group */
+   memset(flow_group_in, 0, inlen);
+   MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+MLX5_MATCH_MISC_PARAMETERS);
+
+   match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, 
match_criteria);
+   misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters);
+   MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+   MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+   MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 
1);
+
+   g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in);
+
+   if (IS_ERR(g)) {
+   err = PTR_ERR(g);
+   mlx5_core_warn(esw->dev, "Failed to create vport rx group err 
%d\n", err);
+   goto out;
+   }
+
+   esw->offloads.vport_rx_group = g;
+out:
+   kfree(flow_group_in);
+   return err;
+}
+
+static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw)
+{
+   mlx5_destroy_flow_group(esw->offloads.vport_rx_group);
+}
+
+struct mlx5_flow_rule *
+mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 
tirn)
+{
+   struct mlx5_flow_destination dest;
+   struct mlx5_flow_rule *flow_rule;
+   int match_header = MLX5_MATCH_MISC_PARAMETERS;
+   u32 *match_v, *match_c;
+   void *misc;
+
+   match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+   match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+   if (!match_v || !match_c) {
+   esw_warn(esw->dev, "Failed to alloc match parameters\n");
+   flow_rule = ERR_PTR(-ENOMEM);
+   goto out;
+   }
+
+   misc = MLX5_ADDR_OF(fte_match_param, match_v, misc_parameters);
+   MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+
+   misc = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters);
+   MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+   dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+   dest.tir_num = tirn;
+
+   flow_rule = mlx5_add_flow_rule(esw->offloads.ft_offloads, match_header, 
match_c,
+  match_v, 
MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+  0, );
+   if (IS_ERR(flow_rule)) {
+   esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule 
err %ld\n", PTR_ERR(flow_rule));
+

[PATCH net-next V2 11/16] net/mlx5e: Create NIC global resources only once

2016-06-30 Thread Saeed Mahameed
From: Hadar Hen Zion <had...@mellanox.com>

To allow creating more than one netdev over the same PCI function, we
change the driver such that global NIC resources are created once and
later be shared amongst all the mlx5e netdevs running over that port.

Move the CQ UAR, PD (pdn), Transport Domain (tdn), MKey resources from
being kept in the mlx5e priv part to a new resources structure
(mlx5e_resources) placed under the mlx5_core device.

This patch doesn't add any new functionality.

Signed-off-by: Hadar Hen Zion <had...@mellanox.com>
Reviewed-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   6 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |   6 +-
 .../net/ethernet/mellanox/mlx5/core/en_common.c| 112 +++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 124 +++--
 include/linux/mlx5/driver.h|  13 +++
 5 files changed, 171 insertions(+), 90 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_common.c

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile 
b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 96f1826..9b14dad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -6,8 +6,8 @@ mlx5_core-y :=  main.o cmd.o debugfs.o fw.o eq.o uar.o 
pagealloc.o \
fs_counters.o rl.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
-   en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \
-   en_rx_am.o en_txrx.o en_clock.o vxlan.o en_tc.o \
-   en_arfs.o
+   en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
+   en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \
+   en_tc.o en_arfs.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index da885c0..da93bf55 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -570,10 +570,6 @@ struct mlx5e_priv {
 
unsigned long  state;
struct mutex   state_lock; /* Protects Interface state */
-   struct mlx5_uarcq_uar;
-   u32pdn;
-   u32tdn;
-   struct mlx5_core_mkey  mkey;
struct mlx5_core_mkey  umr_mkey;
struct mlx5e_rqdrop_rq;
 
@@ -788,5 +784,7 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const 
struct sk_buff *skb,
 #endif
 
 u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev);
+int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev);
+void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
 
 #endif /* __MLX5_EN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
new file mode 100644
index 000..33b3732
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ *  - Redistributions of source code must retain the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer.
+ *
+ *  - Redistributions in binary form must reproduce the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer in the documentation and/or other materials
+ *provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "en.h"
+
+/* mlx5e global resources should be placed in this file.
+ * Global resources are common to all the netdevices crated on the same nic.
+ */
+
+static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
+struct mlx5_core_mkey *mkey)
+{
+   struct m

Re: [PATCH net 08/13] net/mlx5e: Implement ndo_tx_timeout callback

2016-06-30 Thread Saeed Mahameed
On Thu, Jun 30, 2016 at 6:15 PM, Yuval Mintz  wrote:
>> Add callback to handle TX timeouts.
>>
>> Fixes: f62b8bb8f2d3 ('net/mlx5: Extend mlx5_core to support ConnectX-4 
>> Ethernet functionality')
>
> Not that I mind, but does implementing ndo_tx_timeout actually counts as a 
> fix?

Why not if you want it to get backported to -stable and distros as a
resiliency fix.

Maybe Dave can give some insight..


[PATCH net-next V2 06/16] net/mlx5: E-Switch, Add offloads table

2016-06-30 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Belongs to the NIC offloads name-space, and to be used as part of the
SRIOV offloads logic to steer packets that hit the e-switch miss rule
to the TIR of the relevant VF representor.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  5 
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 31 ++
 2 files changed, 36 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index b7fabd1..32db37a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -155,6 +155,10 @@ enum {
SRIOV_OFFLOADS
 };
 
+struct mlx5_esw_offload {
+   struct mlx5_flow_table *ft_offloads;
+};
+
 struct mlx5_eswitch {
struct mlx5_core_dev*dev;
struct mlx5_l2_tablel2_table;
@@ -169,6 +173,7 @@ struct mlx5_eswitch {
 */
struct mutexstate_lock;
struct esw_mc_addr  *mc_promisc;
+   struct mlx5_esw_offload offloads;
int mode;
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 8964f71..e895c6f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -212,3 +212,34 @@ void esw_destroy_offloads_fdb_table(struct mlx5_eswitch 
*esw)
 
mlx5_destroy_flow_table(esw->fdb_table.fdb);
 }
+
+static int esw_create_offloads_table(struct mlx5_eswitch *esw)
+{
+   struct mlx5_flow_namespace *ns;
+   struct mlx5_flow_table *ft_offloads;
+   struct mlx5_core_dev *dev = esw->dev;
+   int err = 0;
+
+   ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS);
+   if (!ns) {
+   esw_warn(esw->dev, "Failed to get offloads flow namespace\n");
+   return -ENOMEM;
+   }
+
+   ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 
2, 0);
+   if (IS_ERR(ft_offloads)) {
+   err = PTR_ERR(ft_offloads);
+   esw_warn(esw->dev, "Failed to create offloads table, err %d\n", 
err);
+   return err;
+   }
+
+   esw->offloads.ft_offloads = ft_offloads;
+   return 0;
+}
+
+static void esw_destroy_offloads_table(struct mlx5_eswitch *esw)
+{
+   struct mlx5_esw_offload *offloads = >offloads;
+
+   mlx5_destroy_flow_table(offloads->ft_offloads);
+}
-- 
2.8.0



[PATCH net-next V2 09/16] net/mlx5: Add devlink interface

2016-06-30 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

The devlink interface is initially used to set/get the mode of the SRIOV 
e-switch.

Currently, these are only stubs for get/set, down-stream patch will actually
fill them out.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Kconfig|  1 +
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  4 
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 10 +
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 26 ++
 4 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig 
b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 1cf722e..aae4688 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -4,6 +4,7 @@
 
 config MLX5_CORE
tristate "Mellanox Technologies ConnectX-4 and Connect-IB core driver"
+   depends on MAY_USE_DEVLINK
depends on PCI
default n
---help---
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index cf959f7..7843f98 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -35,6 +35,7 @@
 
 #include 
 #include 
+#include 
 #include 
 
 #define MLX5_MAX_UC_PER_VPORT(dev) \
@@ -205,6 +206,9 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch 
*esw, int vport, u32 sqn
 struct mlx5_flow_rule *
 mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 
tirn);
 
+int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode);
+int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode);
+
 #define MLX5_DEBUG_ESWITCH_MASK BIT(3)
 
 #define esw_info(dev, format, ...) \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 7aad367..e1727a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -328,3 +328,13 @@ out:
kfree(match_c);
return flow_rule;
 }
+
+int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
+{
+   return -EOPNOTSUPP;
+}
+
+int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
+{
+   return -EOPNOTSUPP;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 08cae34..2abd387 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -51,6 +51,7 @@
 #ifdef CONFIG_RFS_ACCEL
 #include 
 #endif
+#include 
 #include "mlx5_core.h"
 #include "fs_core.h"
 #ifdef CONFIG_MLX5_CORE_EN
@@ -1315,19 +1316,28 @@ struct mlx5_core_event_handler {
  void *data);
 };
 
+static const struct devlink_ops mlx5_devlink_ops = {
+#ifdef CONFIG_MLX5_CORE_EN
+   .eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
+   .eswitch_mode_get = mlx5_devlink_eswitch_mode_get,
+#endif
+};
 
 static int init_one(struct pci_dev *pdev,
const struct pci_device_id *id)
 {
struct mlx5_core_dev *dev;
+   struct devlink *devlink;
struct mlx5_priv *priv;
int err;
 
-   dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-   if (!dev) {
+   devlink = devlink_alloc(_devlink_ops, sizeof(*dev));
+   if (!devlink) {
dev_err(>dev, "kzalloc failed\n");
return -ENOMEM;
}
+
+   dev = devlink_priv(devlink);
priv = >priv;
priv->pci_dev_data = id->driver_data;
 
@@ -1364,15 +1374,21 @@ static int init_one(struct pci_dev *pdev,
goto clean_health;
}
 
+   err = devlink_register(devlink, >dev);
+   if (err)
+   goto clean_load;
+
return 0;
 
+clean_load:
+   mlx5_unload_one(dev, priv);
 clean_health:
mlx5_health_cleanup(dev);
 close_pci:
mlx5_pci_close(dev, priv);
 clean_dev:
pci_set_drvdata(pdev, NULL);
-   kfree(dev);
+   devlink_free(devlink);
 
return err;
 }
@@ -1380,8 +1396,10 @@ clean_dev:
 static void remove_one(struct pci_dev *pdev)
 {
struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
+   struct devlink *devlink = priv_to_devlink(dev);
struct mlx5_priv *priv = >priv;
 
+   devlink_unregister(devlink);
if (mlx5_unload_one(dev, priv)) {
dev_err(>pdev->dev, "mlx5_unload_one failed\n");
mlx5_health_cleanup(dev);
@@ -1390,7 +1408,7 @@ static void remove_one(struct pci_dev *pdev)
mlx5_health_cleanup(dev);
mlx5_pci_close(dev, priv);
pci_set_drvdata(p

[PATCH net-next V2 02/16] net/mlx5: E-Switch, Add support for the sriov offloads mode

2016-06-30 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Unlike the legacy mode, here, forwarding rules are not learned by the
driver per events on macs set by VFs/VMs into their vports, but rather
should be programmed by higher-level SW entities.

Saying that, still, in the offloads mode (SRIOV_OFFLOADS), two flow
groups are created by the driver for management (slow path) purposes:

The first group will be used for sending packets over e-switch vports
from the host OS where the e-switch management code runs, to be
received by VFs.

The second group will be used by a miss rule which forwards packets toward
the e-switch manager. Further logic will trap these packets such that
the receiving net-device as seen by the networking stack is the representor
of the vport that sent the packet over the e-switch data-path.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  35 +++---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  16 +++
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 135 +
 4 files changed, 168 insertions(+), 20 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile 
b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index c4f450f..96f1826 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -5,7 +5,7 @@ mlx5_core-y :=  main.o cmd.o debugfs.o fw.o eq.o uar.o 
pagealloc.o \
mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
fs_counters.o rl.o
 
-mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \
+mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \
en_rx_am.o en_txrx.o en_clock.o vxlan.o en_tc.o \
en_arfs.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 8068dde..1fc4cfd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -40,17 +40,6 @@
 
 #define UPLINK_VPORT 0x
 
-#define MLX5_DEBUG_ESWITCH_MASK BIT(3)
-
-#define esw_info(dev, format, ...) \
-   pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
-
-#define esw_warn(dev, format, ...) \
-   pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
-
-#define esw_debug(dev, format, ...)\
-   mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__)
-
 enum {
MLX5_ACTION_NONE = 0,
MLX5_ACTION_ADD  = 1,
@@ -92,6 +81,9 @@ enum {
MC_ADDR_CHANGE | \
PROMISC_CHANGE)
 
+int  esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports);
+void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw);
+
 static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
u32 events_mask)
 {
@@ -578,7 +570,8 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct 
vport_addr *vaddr)
if (err)
goto abort;
 
-   if (esw->fdb_table.fdb) /* SRIOV is enabled: Forward UC MAC to vport */
+   /* SRIOV is enabled: Forward UC MAC to vport */
+   if (esw->fdb_table.fdb && esw->mode == SRIOV_LEGACY)
vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
 
esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n",
@@ -1543,7 +1536,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, 
int vport_num)
 int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 {
int err;
-   int i;
+   int i, enabled_events;
 
if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
@@ -1562,18 +1555,19 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, 
int nvfs, int mode)
esw_warn(esw->dev, "E-Switch engress ACL is not supported by 
FW\n");
 
esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, 
mode);
-   if (mode != SRIOV_LEGACY)
-   return -EINVAL;
-
esw->mode = mode;
esw_disable_vport(esw, 0);
 
-   err = esw_create_legacy_fdb_table(esw, nvfs + 1);
+   if (mode == SRIOV_LEGACY)
+   err = esw_create_legacy_fdb_table(esw, nvfs + 1);
+   else
+   err = esw_create_offloads_fdb_table(esw, nvfs + 1);
if (err)
goto abort;
 
+   

[PATCH net-next V2 01/16] net/mlx5: E-Switch, Add operational mode to the SRIOV e-Switch

2016-06-30 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Define three modes for the SRIOV e-switch operation, none (SRIOV_NONE,
none of the VF vports are enabled), legacy (SRIOV_LEGACY, the current mode)
and sriov offloads (SRIOV_OFFLOADS). Currently, when in SRIOV, only the
legacy mode is supported, where steering rules are of the form:

destination mac --> VF vport

This patch does not change any functionality.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 51 +--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 19 +++--
 drivers/net/ethernet/mellanox/mlx5/core/sriov.c   |  5 ++-
 3 files changed, 46 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index aebbd6c..8068dde 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -428,7 +428,7 @@ esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, 
u32 vport)
return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v);
 }
 
-static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports)
+static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports)
 {
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_core_dev *dev = esw->dev;
@@ -479,7 +479,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, 
int nvports)
esw_warn(dev, "Failed to create flow group err(%d)\n", err);
goto out;
}
-   esw->fdb_table.addr_grp = g;
+   esw->fdb_table.legacy.addr_grp = g;
 
/* Allmulti group : One rule that forwards any mcast traffic */
MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
@@ -494,7 +494,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, 
int nvports)
esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", 
err);
goto out;
}
-   esw->fdb_table.allmulti_grp = g;
+   esw->fdb_table.legacy.allmulti_grp = g;
 
/* Promiscuous group :
 * One rule that forward all unmatched traffic from previous groups
@@ -511,17 +511,17 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, 
int nvports)
esw_warn(dev, "Failed to create promisc flow group err(%d)\n", 
err);
goto out;
}
-   esw->fdb_table.promisc_grp = g;
+   esw->fdb_table.legacy.promisc_grp = g;
 
 out:
if (err) {
-   if (!IS_ERR_OR_NULL(esw->fdb_table.allmulti_grp)) {
-   mlx5_destroy_flow_group(esw->fdb_table.allmulti_grp);
-   esw->fdb_table.allmulti_grp = NULL;
+   if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.allmulti_grp)) {
+   
mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
+   esw->fdb_table.legacy.allmulti_grp = NULL;
}
-   if (!IS_ERR_OR_NULL(esw->fdb_table.addr_grp)) {
-   mlx5_destroy_flow_group(esw->fdb_table.addr_grp);
-   esw->fdb_table.addr_grp = NULL;
+   if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.addr_grp)) {
+   mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
+   esw->fdb_table.legacy.addr_grp = NULL;
}
if (!IS_ERR_OR_NULL(esw->fdb_table.fdb)) {
mlx5_destroy_flow_table(esw->fdb_table.fdb);
@@ -533,20 +533,20 @@ out:
return err;
 }
 
-static void esw_destroy_fdb_table(struct mlx5_eswitch *esw)
+static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw)
 {
if (!esw->fdb_table.fdb)
return;
 
esw_debug(esw->dev, "Destroy FDB Table\n");
-   mlx5_destroy_flow_group(esw->fdb_table.promisc_grp);
-   mlx5_destroy_flow_group(esw->fdb_table.allmulti_grp);
-   mlx5_destroy_flow_group(esw->fdb_table.addr_grp);
+   mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp);
+   mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
+   mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
mlx5_destroy_flow_table(esw->fdb_table.fdb);
esw->fdb_table.fdb = NULL;
-   esw->fdb_table.addr_grp = NULL;
-   esw->fdb_table.allmulti_grp = NULL;
-   esw->fdb_table.promisc_grp = NULL;
+   esw->fdb_table.legacy.addr_grp = NULL;
+   esw->fdb_table.legacy.allmulti_grp = NULL;
+   esw->fdb_table.legacy.promisc_grp = NULL;
 }
 
 /* E-Switch vport UC/MC lists management */
@@ -1540,7 +1540,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, 
int vpo

[PATCH net-next V2 00/16] Mellanox 100G SRIOV E-Switch offload and VF representors

2016-06-30 Thread Saeed Mahameed
d up 
with few internal callbacks that should be implemented by a profile instance. 
The profile 
for the conventional NIC is implemented, to preserve the existing functionality.

The last two patches add e-switch registration API for the VF representors and 
the 
implementation of the VF representors netdevice profile. Being an mlx5e 
instance, the 
VF representor uses HW send/recv queues, completions queues and such. It 
currently doesn't 
support NIC offloads but some of them could be added later on. The VF 
representor has 
switchdev ops, where currently the only supported API is the one to the HW ID,
which is needed to identify multiple representors belonging to the same 
e-switch.

The architecture + solution (software and firmware) work were done by a team 
consisting 
of Ilya Lesokhin, Haggai Eran, Rony Efraim, Tal Anker, Natan Oppenheimer, Saeed 
Mahameed, 
Hadar and Or, thanks you all!

v1 --> v2 fixes:
* removed unneeded variable (patch #3)
* removed unused value DEVLINK_ESWITCH_MODE_NONE (patch #8)
* changed the devlink mode name from "offloads" to "switchdev" which
   better describes what are we referring here, using a known concept (patch #8)
* correctly refer to devlink e-switch modes (patch #10)
* use the correct mlx5e way to define the VF rep statistics  (patch #16)

Thanks,
Or & Saeed.

Hadar Hen Zion (6):
  net/mlx5e: Create NIC global resources only once
  net/mlx5e: TIRs management refactoring
  net/mlx5e: Mark enabled RQTs instances explicitly
  net/mlx5e: Add support for multiple profiles
  net/mlx5: Add Representors registration API
  net/mlx5e: Introduce SRIOV VF representors

Or Gerlitz (10):
  net/mlx5: E-Switch, Add operational mode to the SRIOV e-Switch
  net/mlx5: E-Switch, Add support for the sriov offloads mode
  net/mlx5: E-Switch, Add miss rule for offloads mode
  net/mlx5: E-Switch, Add API to create send-to-vport rules
  net/mlx5: Introduce offloads steering namespace
  net/mlx5: E-Switch, Add offloads table
  net/mlx5: E-Switch, Add API to create vport rx rules
  net/devlink: Add E-Switch mode control
  net/mlx5: Add devlink interface
  net/mlx5e: Add devlink based SRIOV mode changes

 drivers/net/ethernet/mellanox/mlx5/core/Kconfig|   1 +
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   8 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |  73 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c  |  14 +-
 .../net/ethernet/mellanox/mlx5/core/en_common.c| 160 ++
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c|   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 627 -
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   | 394 +
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  90 +--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  78 ++-
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 566 +++
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  |  11 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c |  26 +-
 drivers/net/ethernet/mellanox/mlx5/core/sriov.c|   5 +-
 include/linux/mlx5/driver.h|  13 +
 include/linux/mlx5/fs.h|   1 +
 include/net/devlink.h  |   3 +
 include/uapi/linux/devlink.h   |   8 +
 net/core/devlink.c |  87 +++
 20 files changed, 1840 insertions(+), 331 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_common.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c

-- 
2.8.0



[PATCH net-next V3 13/16] net/mlx5e: Mark enabled RQTs instances explicitly

2016-07-01 Thread Saeed Mahameed
From: Hadar Hen Zion <had...@mellanox.com>

In the current driver implementation two types of receive queue
tables (RQTs) are in use - direct and indirect.

Change the driver to mark each new created RQT (direct or indirect)
as "enabled". This behaviour is needed for introducing new mlx5e
instances which serve to represent SRIOV VFs.

The VF representors will have only one type of RQTs (direct).

An "enabled" flag is added to each RQT to allow better handling
and code sharing between the representors and the nic netdevices.

This patch doesn't add any new functionality.

Signed-off-by: Hadar Hen Zion <had...@mellanox.com>
Reviewed-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h   | 13 +--
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 45 +-
 3 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 8dad50c..91c6bbe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -552,10 +552,15 @@ struct mlx5e_flow_steering {
struct mlx5e_arfs_tablesarfs;
 };
 
-struct mlx5e_tir {
-   u32  tirn;
+struct mlx5e_rqt {
u32  rqtn;
-   struct list_head list;
+   bool enabled;
+};
+
+struct mlx5e_tir {
+   u32   tirn;
+   struct mlx5e_rqt  rqt;
+   struct list_head  list;
 };
 
 enum {
@@ -576,7 +581,7 @@ struct mlx5e_priv {
 
struct mlx5e_channel **channel;
u32tisn[MLX5E_MAX_NUM_TC];
-   u32indir_rqtn;
+   struct mlx5e_rqt   indir_rqt;
struct mlx5e_tir   indir_tir[MLX5E_NUM_INDIR_TIRS];
struct mlx5e_tir   direct_tir[MLX5E_MAX_NUM_CHANNELS];
u32tx_rates[MLX5E_MAX_NUM_SQS];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 5b88967..7e61ffa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -898,7 +898,7 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 
*indir,
mutex_lock(>state_lock);
 
if (indir) {
-   u32 rqtn = priv->indir_rqtn;
+   u32 rqtn = priv->indir_rqt.rqtn;
 
memcpy(priv->params.indirection_rqt, indir,
   sizeof(priv->params.indirection_rqt));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 30efa8a..7f1f1ec 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1486,7 +1486,8 @@ static void mlx5e_fill_direct_rqt_rqn(struct mlx5e_priv 
*priv, void *rqtc,
MLX5_SET(rqtc, rqtc, rq_num[0], rqn);
 }
 
-static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, int ix, u32 *rqtn)
+static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz,
+   int ix, struct mlx5e_rqt *rqt)
 {
struct mlx5_core_dev *mdev = priv->mdev;
void *rqtc;
@@ -1509,34 +1510,37 @@ static int mlx5e_create_rqt(struct mlx5e_priv *priv, 
int sz, int ix, u32 *rqtn)
else
mlx5e_fill_direct_rqt_rqn(priv, rqtc, ix);
 
-   err = mlx5_core_create_rqt(mdev, in, inlen, rqtn);
+   err = mlx5_core_create_rqt(mdev, in, inlen, >rqtn);
+   if (!err)
+   rqt->enabled = true;
 
kvfree(in);
return err;
 }
 
-static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, u32 rqtn)
+static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt)
 {
-   mlx5_core_destroy_rqt(priv->mdev, rqtn);
+   rqt->enabled = false;
+   mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn);
 }
 
 static int mlx5e_create_rqts(struct mlx5e_priv *priv)
 {
int nch = mlx5e_get_max_num_channels(priv->mdev);
-   u32 *rqtn;
+   struct mlx5e_rqt *rqt;
int err;
int ix;
 
/* Indirect RQT */
-   rqtn = >indir_rqtn;
-   err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqtn);
+   rqt = >indir_rqt;
+   err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt);
if (err)
return err;
 
/* Direct RQTs */
for (ix = 0; ix < nch; ix++) {
-   rqtn = >direct_tir[ix].rqtn;
-   err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqtn);
+   rqt = >direct_tir[ix].rqt;
+   err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqt);
if (err)
goto err_

[PATCH net-next V3 12/16] net/mlx5e: TIRs management refactoring

2016-07-01 Thread Saeed Mahameed
From: Hadar Hen Zion <had...@mellanox.com>

The current refresh tirs self loopback mechanism, refreshes all the tirs
belonging to the same mlx5e instance to prevent self loopback by packets
sent over any ring of that instance. This mechanism relies on all the
tirs/tises of an instance to be created with the same transport domain
number (tdn).

Change the driver to refresh all the tirs created under the same tdn
regardless of which mlx5e netdev instance they belong to.

This behaviour is needed for introducing new mlx5e instances which serve
to represent SRIOV VFs. The representors and the PF share vport used for
E-Switch management, and we want to avoid NIC level HW loopback between
them, e.g when sending broadcast packets. To achieve that, both the
representors and the PF NIC will share the tdn.

This patch doesn't add any new functionality.

Signed-off-by: Hadar Hen Zion <had...@mellanox.com>
Reviewed-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h   | 12 +++--
 drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c  | 14 +++---
 .../net/ethernet/mellanox/mlx5/core/en_common.c| 48 +++
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c|  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 56 +-
 6 files changed, 77 insertions(+), 57 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 3226b92..8dad50c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -552,9 +552,10 @@ struct mlx5e_flow_steering {
struct mlx5e_arfs_tablesarfs;
 };
 
-struct mlx5e_direct_tir {
+struct mlx5e_tir {
u32  tirn;
u32  rqtn;
+   struct list_head list;
 };
 
 enum {
@@ -576,8 +577,8 @@ struct mlx5e_priv {
struct mlx5e_channel **channel;
u32tisn[MLX5E_MAX_NUM_TC];
u32indir_rqtn;
-   u32indir_tirn[MLX5E_NUM_INDIR_TIRS];
-   struct mlx5e_direct_tirdirect_tir[MLX5E_MAX_NUM_CHANNELS];
+   struct mlx5e_tir   indir_tir[MLX5E_NUM_INDIR_TIRS];
+   struct mlx5e_tir   direct_tir[MLX5E_MAX_NUM_CHANNELS];
u32tx_rates[MLX5E_MAX_NUM_SQS];
 
struct mlx5e_flow_steering fs;
@@ -784,7 +785,12 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const 
struct sk_buff *skb,
 #endif
 
 u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev);
+int mlx5e_create_tir(struct mlx5_core_dev *mdev,
+struct mlx5e_tir *tir, u32 *in, int inlen);
+void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
+  struct mlx5e_tir *tir);
 int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev);
 void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
+int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev);
 
 #endif /* __MLX5_EN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index 3515e78..10f18d4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -93,14 +93,14 @@ static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type 
type)
 static int arfs_disable(struct mlx5e_priv *priv)
 {
struct mlx5_flow_destination dest;
-   u32 *tirn = priv->indir_tirn;
+   struct mlx5e_tir *tir = priv->indir_tir;
int err = 0;
int tt;
int i;
 
dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
for (i = 0; i < ARFS_NUM_TYPES; i++) {
-   dest.tir_num = tirn[i];
+   dest.tir_num = tir[i].tirn;
tt = arfs_get_tt(i);
/* Modify ttc rules destination to bypass the aRFS tables*/
err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt],
@@ -176,7 +176,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
struct arfs_table *arfs_t = >fs.arfs.arfs_tables[type];
struct mlx5_flow_destination dest;
u8 match_criteria_enable = 0;
-   u32 *tirn = priv->indir_tirn;
+   struct mlx5e_tir *tir = priv->indir_tir;
u32 *match_criteria;
u32 *match_value;
int err = 0;
@@ -192,16 +192,16 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
switch (type) {
case ARFS_IPV4_TCP:
-   dest.tir_num = tirn[MLX5E_TT_IPV4_TCP];
+   dest.tir_num = tir[MLX5E_TT_IPV4_TCP].tirn;
break;
case ARFS_IPV4_UDP:
-   dest.tir_num = tirn[MLX5E_TT_IPV4_UDP];
+   dest.tir_num = tir[MLX5E_TT_IPV4_U

[PATCH net-next V3 15/16] net/mlx5: Add Representors registration API

2016-07-01 Thread Saeed Mahameed
From: Hadar Hen Zion <had...@mellanox.com>

Introduce E-Switch registration/unregister representors functions.

Those functions are called by the mlx5e driver when the PF NIC is
created upon pci probe action regardless of the E-Switch mode (NONE,
LEGACY or OFFLOADS).

Adding basic E-Switch database that will hold the vport represntors
upon creation.

This patch doesn't add any new functionality.

Signed-off-by: Hadar Hen Zion <had...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |  3 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 60 +++---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  | 10 
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  | 12 +
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 19 +++
 5 files changed, 97 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index edfc9be..081259a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -571,7 +571,7 @@ enum {
 struct mlx5e_profile {
void(*init)(struct mlx5_core_dev *mdev,
struct net_device *netdev,
-   const struct mlx5e_profile *profile);
+   const struct mlx5e_profile *profile, void *ppriv);
void(*cleanup)(struct mlx5e_priv *priv);
int (*init_rx)(struct mlx5e_priv *priv);
void(*cleanup_rx)(struct mlx5e_priv *priv);
@@ -618,6 +618,7 @@ struct mlx5e_priv {
struct mlx5e_tstamptstamp;
u16 q_counter;
const struct mlx5e_profile *profile;
+   void  *ppriv;
 };
 
 enum mlx5e_link_mode {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 3e22c5e..2c9e458 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2880,7 +2880,8 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params 
*params, u8 cq_period_mode)
 
 static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
struct net_device *netdev,
-   const struct mlx5e_profile *profile)
+   const struct mlx5e_profile *profile,
+   void *ppriv)
 {
struct mlx5e_priv *priv = netdev_priv(netdev);
u32 link_speed = 0;
@@ -2962,6 +2963,7 @@ static void mlx5e_build_nic_netdev_priv(struct 
mlx5_core_dev *mdev,
priv->netdev   = netdev;
priv->params.num_channels  = profile->max_nch(mdev);
priv->profile  = profile;
+   priv->ppriv= ppriv;
 
 #ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_ets_init(priv);
@@ -3126,18 +3128,25 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv 
*priv)
 
 static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
   struct net_device *netdev,
-  const struct mlx5e_profile *profile)
+  const struct mlx5e_profile *profile,
+  void *ppriv)
 {
struct mlx5e_priv *priv = netdev_priv(netdev);
 
-   mlx5e_build_nic_netdev_priv(mdev, netdev, profile);
+   mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv);
mlx5e_build_nic_netdev(netdev);
mlx5e_vxlan_init(priv);
 }
 
 static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
 {
+   struct mlx5_core_dev *mdev = priv->mdev;
+   struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
mlx5e_vxlan_cleanup(priv);
+
+   if (MLX5_CAP_GEN(mdev, vport_group_manager))
+   mlx5_eswitch_unregister_vport_rep(esw, 0);
 }
 
 static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
@@ -3229,6 +3238,8 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
 {
struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
+   struct mlx5_eswitch *esw = mdev->priv.eswitch;
+   struct mlx5_eswitch_rep rep;
 
if (mlx5e_vxlan_allowed(mdev)) {
rtnl_lock();
@@ -3238,6 +3249,12 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
 
mlx5e_enable_async_events(priv);
queue_work(priv->wq, >set_rx_mode_work);
+
+   if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
+   rep.vport = 0;
+   rep.priv_data = priv;
+   mlx5_eswitch_register_vport_rep(esw, );
+   }
 }
 
 static void mlx5e_nic_disable(struct mlx5e_priv *priv)
@@ -3261,7 +3278,7 @@ static const struct mlx5e_profile mlx5e_nic_profile = {
 };
 
 static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
- 

Re: [PATCH net] net: poll tx timeout only on active tx queues

2016-07-01 Thread Saeed Mahameed
On Fri, Jul 1, 2016 at 8:18 AM, Eric Dumazet  wrote:
> On Fri, 2016-07-01 at 04:50 +, Yuval Mintz wrote:
>> > currently all the device driver call  netif_tx_start_all_queues(dev)
>> > on open to W/A this issue. which is strange since only
>> > real_num_tx_queues are active.
>>
>> You could also argue that netif_tx_start_all_queues() should
>> only enable the real_num_tx_queues.
>> [Although that would obviously cause all drivers to reach the
>> 'problem' you're currently fixing].
>
> Yep. Basically what I pointed out.
>
> It seems inconsistent to have loops using num_tx_queues, and others
> using real_num_tx_queues.
>
> Instead of 'fixing' one of them, we should take a deeper look, even if
> the change looks fine.
>
> num_tx_queues should be used in code that runs once, like
> netdev_lockdep_set_classes(), but other loops should probably use
> real_num_tx_queues.
>
> Anyway all these changes should definitely target net-next, not net
> tree.
>

Thank you Eric and Yuval,

Although i slightly disagree, this patch is good as is, even with the
inconsistency, which is there due to a bad design.
I don't' see why new drivers need to keep copy from old wrong
implementations and workarounds.

But for the long term, you have a point.
We will consider a deeper fix for net-next as you suggested, and drop
this temporary fix.

Thanks
Saeed.


[PATCH net-next V3 04/16] net/mlx5: E-Switch, Add API to create send-to-vport rules

2016-07-01 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Add the API to create send-to-vport e-switch rules of the form

 packet meta-data :: send-queue-number == $SQN and source-vport == 0 --> $VPORT

These rules are to be used for a send-to-vport logic which conceptually bypasses
the "normal" steering rules currently present at the e-switch datapath.

Such rule should apply only for packets that originate in the e-switch manager
vport (0) and are sent for a given SQN which is used by a given VF representor
device, and hence the matching logic.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  3 +-
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 39 ++
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 8eed33f..b7fabd1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -193,6 +193,8 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
 int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 int vport,
 struct ifla_vf_stats *vf_stats);
+struct mlx5_flow_rule *
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 
sqn);
 
 #define MLX5_DEBUG_ESWITCH_MASK BIT(3)
 
@@ -204,5 +206,4 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 
 #define esw_debug(dev, format, ...)\
mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__)
-
 #endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index e3d81ae..8964f71 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -38,6 +38,45 @@
 #include "mlx5_core.h"
 #include "eswitch.h"
 
+struct mlx5_flow_rule *
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 
sqn)
+{
+   struct mlx5_flow_destination dest;
+   struct mlx5_flow_rule *flow_rule;
+   int match_header = MLX5_MATCH_MISC_PARAMETERS;
+   u32 *match_v, *match_c;
+   void *misc;
+
+   match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+   match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+   if (!match_v || !match_c) {
+   esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n");
+   flow_rule = ERR_PTR(-ENOMEM);
+   goto out;
+   }
+
+   misc = MLX5_ADDR_OF(fte_match_param, match_v, misc_parameters);
+   MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
+   MLX5_SET(fte_match_set_misc, misc, source_port, 0x0); /* source vport 
is 0 */
+
+   misc = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters);
+   MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
+   MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+   dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+   dest.vport_num = vport;
+
+   flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, match_header, 
match_c,
+  match_v, 
MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+  0, );
+   if (IS_ERR(flow_rule))
+   esw_warn(esw->dev, "FDB: Failed to add send to vport rule err 
%ld\n", PTR_ERR(flow_rule));
+out:
+   kfree(match_v);
+   kfree(match_c);
+   return flow_rule;
+}
+
 static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 {
struct mlx5_flow_destination dest;
-- 
2.8.0



[PATCH net-next V3 01/16] net/mlx5: E-Switch, Add operational mode to the SRIOV e-Switch

2016-07-01 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Define three modes for the SRIOV e-switch operation, none (SRIOV_NONE,
none of the VF vports are enabled), legacy (SRIOV_LEGACY, the current mode)
and sriov offloads (SRIOV_OFFLOADS). Currently, when in SRIOV, only the
legacy mode is supported, where steering rules are of the form:

destination mac --> VF vport

This patch does not change any functionality.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 51 +--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 19 +++--
 drivers/net/ethernet/mellanox/mlx5/core/sriov.c   |  5 ++-
 3 files changed, 46 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index aebbd6c..8068dde 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -428,7 +428,7 @@ esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, 
u32 vport)
return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v);
 }
 
-static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports)
+static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports)
 {
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_core_dev *dev = esw->dev;
@@ -479,7 +479,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, 
int nvports)
esw_warn(dev, "Failed to create flow group err(%d)\n", err);
goto out;
}
-   esw->fdb_table.addr_grp = g;
+   esw->fdb_table.legacy.addr_grp = g;
 
/* Allmulti group : One rule that forwards any mcast traffic */
MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
@@ -494,7 +494,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, 
int nvports)
esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", 
err);
goto out;
}
-   esw->fdb_table.allmulti_grp = g;
+   esw->fdb_table.legacy.allmulti_grp = g;
 
/* Promiscuous group :
 * One rule that forward all unmatched traffic from previous groups
@@ -511,17 +511,17 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, 
int nvports)
esw_warn(dev, "Failed to create promisc flow group err(%d)\n", 
err);
goto out;
}
-   esw->fdb_table.promisc_grp = g;
+   esw->fdb_table.legacy.promisc_grp = g;
 
 out:
if (err) {
-   if (!IS_ERR_OR_NULL(esw->fdb_table.allmulti_grp)) {
-   mlx5_destroy_flow_group(esw->fdb_table.allmulti_grp);
-   esw->fdb_table.allmulti_grp = NULL;
+   if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.allmulti_grp)) {
+   
mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
+   esw->fdb_table.legacy.allmulti_grp = NULL;
}
-   if (!IS_ERR_OR_NULL(esw->fdb_table.addr_grp)) {
-   mlx5_destroy_flow_group(esw->fdb_table.addr_grp);
-   esw->fdb_table.addr_grp = NULL;
+   if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.addr_grp)) {
+   mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
+   esw->fdb_table.legacy.addr_grp = NULL;
}
if (!IS_ERR_OR_NULL(esw->fdb_table.fdb)) {
mlx5_destroy_flow_table(esw->fdb_table.fdb);
@@ -533,20 +533,20 @@ out:
return err;
 }
 
-static void esw_destroy_fdb_table(struct mlx5_eswitch *esw)
+static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw)
 {
if (!esw->fdb_table.fdb)
return;
 
esw_debug(esw->dev, "Destroy FDB Table\n");
-   mlx5_destroy_flow_group(esw->fdb_table.promisc_grp);
-   mlx5_destroy_flow_group(esw->fdb_table.allmulti_grp);
-   mlx5_destroy_flow_group(esw->fdb_table.addr_grp);
+   mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp);
+   mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
+   mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
mlx5_destroy_flow_table(esw->fdb_table.fdb);
esw->fdb_table.fdb = NULL;
-   esw->fdb_table.addr_grp = NULL;
-   esw->fdb_table.allmulti_grp = NULL;
-   esw->fdb_table.promisc_grp = NULL;
+   esw->fdb_table.legacy.addr_grp = NULL;
+   esw->fdb_table.legacy.allmulti_grp = NULL;
+   esw->fdb_table.legacy.promisc_grp = NULL;
 }
 
 /* E-Switch vport UC/MC lists management */
@@ -1540,7 +1540,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, 
int vpo

[PATCH net-next V3 00/16] Mellanox 100G SRIOV E-Switch offload and VF representors

2016-07-01 Thread Saeed Mahameed
d up 
with few internal callbacks that should be implemented by a profile instance. 
The profile 
for the conventional NIC is implemented, to preserve the existing functionality.

The last two patches add e-switch registration API for the VF representors and 
the 
implementation of the VF representors netdevice profile. Being an mlx5e 
instance, the 
VF representor uses HW send/recv queues, completions queues and such. It 
currently doesn't 
support NIC offloads but some of them could be added later on. The VF 
representor has 
switchdev ops, where currently the only supported API is the one to the HW ID,
which is needed to identify multiple representors belonging to the same 
e-switch.

The architecture + solution (software and firmware) work were done by a team 
consisting 
of Ilya Lesokhin, Haggai Eran, Rony Efraim, Tal Anker, Natan Oppenheimer, Saeed 
Mahameed, 
Hadar and Or, thanks you all!

v1 --> v2 fixes:
* removed unneeded variable (patch #3)
* removed unused value DEVLINK_ESWITCH_MODE_NONE (patch #8)
* changed the devlink mode name from "offloads" to "switchdev" which
   better describes what are we referring here, using a known concept (patch #8)
* correctly refer to devlink e-switch modes (patch #10)
* use the correct mlx5e way to define the VF rep statistics  (patch #16)

v2 --> v3 fixes:
* Rebased on top 6fde0e63eccb 'be2net: signedness bug in be_msix_enable()'
* Handled compilation error introduced by rebase on top "f5074d0ce2f8 Merge 
branch 'mlx5-100G-fixes'"
* This series applies perfectly even with 'mlx5 resiliency and xmit path fixes' 
merged to net-next

Thanks,
Or & Saeed.


Hadar Hen Zion (6):
  net/mlx5e: Create NIC global resources only once
  net/mlx5e: TIRs management refactoring
  net/mlx5e: Mark enabled RQTs instances explicitly
  net/mlx5e: Add support for multiple profiles
  net/mlx5: Add Representors registration API
  net/mlx5e: Introduce SRIOV VF representors

Or Gerlitz (10):
  net/mlx5: E-Switch, Add operational mode to the SRIOV e-Switch
  net/mlx5: E-Switch, Add support for the sriov offloads mode
  net/mlx5: E-Switch, Add miss rule for offloads mode
  net/mlx5: E-Switch, Add API to create send-to-vport rules
  net/mlx5: Introduce offloads steering namespace
  net/mlx5: E-Switch, Add offloads table
  net/mlx5: E-Switch, Add API to create vport rx rules
  net/devlink: Add E-Switch mode control
  net/mlx5: Add devlink interface
  net/mlx5e: Add devlink based SRIOV mode changes

 drivers/net/ethernet/mellanox/mlx5/core/Kconfig|   1 +
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   8 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |  73 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c  |  14 +-
 .../net/ethernet/mellanox/mlx5/core/en_common.c| 160 ++
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c|   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 627 -
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   | 394 +
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  90 +--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  78 ++-
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 566 +++
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  |  11 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c |  26 +-
 drivers/net/ethernet/mellanox/mlx5/core/sriov.c|   5 +-
 include/linux/mlx5/driver.h|  13 +
 include/linux/mlx5/fs.h|   1 +
 include/net/devlink.h  |   3 +
 include/uapi/linux/devlink.h   |   8 +
 net/core/devlink.c |  87 +++
 20 files changed, 1840 insertions(+), 331 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_common.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c

-- 
2.8.0



[PATCH net-next V3 03/16] net/mlx5: E-Switch, Add miss rule for offloads mode

2016-07-01 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

In the sriov offloads mode, packets that are not matched by any other
rule should be sent towards the e-switch manager for further processing.

Add such "miss" rule which matches ANY packet as the last rule in the
e-switch FDB and programs the HW to send the packet to vport 0 where
the e-switch manager runs.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  1 +
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 40 ++
 2 files changed, 41 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 2360180..8eed33f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -144,6 +144,7 @@ struct mlx5_eswitch_fdb {
struct offloads_fdb {
struct mlx5_flow_group *send_to_vport_grp;
struct mlx5_flow_group *miss_grp;
+   struct mlx5_flow_rule  *miss_rule;
} offloads;
};
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index c6b28df..e3d81ae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -38,6 +38,39 @@
 #include "mlx5_core.h"
 #include "eswitch.h"
 
+static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
+{
+   struct mlx5_flow_destination dest;
+   struct mlx5_flow_rule *flow_rule = NULL;
+   u32 *match_v, *match_c;
+   int err = 0;
+
+   match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+   match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+   if (!match_v || !match_c) {
+   esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n");
+   err = -ENOMEM;
+   goto out;
+   }
+
+   dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+   dest.vport_num = 0;
+
+   flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, 0, match_c, match_v,
+  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, 0, 
);
+   if (IS_ERR(flow_rule)) {
+   err = PTR_ERR(flow_rule);
+   esw_warn(esw->dev,  "FDB: Failed to add miss flow rule err 
%d\n", err);
+   goto out;
+   }
+
+   esw->fdb_table.offloads.miss_rule = flow_rule;
+out:
+   kfree(match_v);
+   kfree(match_c);
+   return err;
+}
+
 #define MAX_PF_SQ 256
 
 int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
@@ -110,8 +143,14 @@ int esw_create_offloads_fdb_table(struct mlx5_eswitch 
*esw, int nvports)
}
esw->fdb_table.offloads.miss_grp = g;
 
+   err = esw_add_fdb_miss_rule(esw);
+   if (err)
+   goto miss_rule_err;
+
return 0;
 
+miss_rule_err:
+   mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
 miss_err:
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
 send_vport_err:
@@ -128,6 +167,7 @@ void esw_destroy_offloads_fdb_table(struct mlx5_eswitch 
*esw)
return;
 
esw_debug(esw->dev, "Destroy offloads FDB Table\n");
+   mlx5_del_flow_rule(esw->fdb_table.offloads.miss_rule);
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
 
-- 
2.8.0



[PATCH net-next V3 11/16] net/mlx5e: Create NIC global resources only once

2016-07-01 Thread Saeed Mahameed
From: Hadar Hen Zion <had...@mellanox.com>

To allow creating more than one netdev over the same PCI function, we
change the driver such that global NIC resources are created once and
later be shared amongst all the mlx5e netdevs running over that port.

Move the CQ UAR, PD (pdn), Transport Domain (tdn), MKey resources from
being kept in the mlx5e priv part to a new resources structure
(mlx5e_resources) placed under the mlx5_core device.

This patch doesn't add any new functionality.

Signed-off-by: Hadar Hen Zion <had...@mellanox.com>
Reviewed-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   6 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |   6 +-
 .../net/ethernet/mellanox/mlx5/core/en_common.c| 112 +++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 124 +++--
 include/linux/mlx5/driver.h|  13 +++
 5 files changed, 171 insertions(+), 90 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_common.c

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile 
b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 96f1826..9b14dad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -6,8 +6,8 @@ mlx5_core-y :=  main.o cmd.o debugfs.o fw.o eq.o uar.o 
pagealloc.o \
fs_counters.o rl.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
-   en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \
-   en_rx_am.o en_txrx.o en_clock.o vxlan.o en_tc.o \
-   en_arfs.o
+   en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
+   en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \
+   en_tc.o en_arfs.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index b97511b..3226b92 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -570,10 +570,6 @@ struct mlx5e_priv {
 
unsigned long  state;
struct mutex   state_lock; /* Protects Interface state */
-   struct mlx5_uarcq_uar;
-   u32pdn;
-   u32tdn;
-   struct mlx5_core_mkey  mkey;
struct mlx5_core_mkey  umr_mkey;
struct mlx5e_rqdrop_rq;
 
@@ -788,5 +784,7 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const 
struct sk_buff *skb,
 #endif
 
 u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev);
+int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev);
+void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
 
 #endif /* __MLX5_EN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
new file mode 100644
index 000..33b3732
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ *  - Redistributions of source code must retain the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer.
+ *
+ *  - Redistributions in binary form must reproduce the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer in the documentation and/or other materials
+ *provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "en.h"
+
+/* mlx5e global resources should be placed in this file.
+ * Global resources are common to all the netdevices crated on the same nic.
+ */
+
+static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
+struct mlx5_core_mkey *mkey)
+{
+   struct m

[PATCH net-next V3 09/16] net/mlx5: Add devlink interface

2016-07-01 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

The devlink interface is initially used to set/get the mode of the SRIOV 
e-switch.

Currently, these are only stubs for get/set, down-stream patch will actually
fill them out.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Kconfig|  1 +
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  4 
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 10 +
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 26 ++
 4 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig 
b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 1cf722e..aae4688 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -4,6 +4,7 @@
 
 config MLX5_CORE
tristate "Mellanox Technologies ConnectX-4 and Connect-IB core driver"
+   depends on MAY_USE_DEVLINK
depends on PCI
default n
---help---
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index cf959f7..7843f98 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -35,6 +35,7 @@
 
 #include 
 #include 
+#include 
 #include 
 
 #define MLX5_MAX_UC_PER_VPORT(dev) \
@@ -205,6 +206,9 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch 
*esw, int vport, u32 sqn
 struct mlx5_flow_rule *
 mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 
tirn);
 
+int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode);
+int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode);
+
 #define MLX5_DEBUG_ESWITCH_MASK BIT(3)
 
 #define esw_info(dev, format, ...) \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 7aad367..e1727a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -328,3 +328,13 @@ out:
kfree(match_c);
return flow_rule;
 }
+
+int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
+{
+   return -EOPNOTSUPP;
+}
+
+int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
+{
+   return -EOPNOTSUPP;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 1f3b6d6..1fb3c68 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -51,6 +51,7 @@
 #ifdef CONFIG_RFS_ACCEL
 #include 
 #endif
+#include 
 #include "mlx5_core.h"
 #include "fs_core.h"
 #ifdef CONFIG_MLX5_CORE_EN
@@ -1315,19 +1316,28 @@ struct mlx5_core_event_handler {
  void *data);
 };
 
+static const struct devlink_ops mlx5_devlink_ops = {
+#ifdef CONFIG_MLX5_CORE_EN
+   .eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
+   .eswitch_mode_get = mlx5_devlink_eswitch_mode_get,
+#endif
+};
 
 static int init_one(struct pci_dev *pdev,
const struct pci_device_id *id)
 {
struct mlx5_core_dev *dev;
+   struct devlink *devlink;
struct mlx5_priv *priv;
int err;
 
-   dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-   if (!dev) {
+   devlink = devlink_alloc(_devlink_ops, sizeof(*dev));
+   if (!devlink) {
dev_err(>dev, "kzalloc failed\n");
return -ENOMEM;
}
+
+   dev = devlink_priv(devlink);
priv = >priv;
priv->pci_dev_data = id->driver_data;
 
@@ -1364,15 +1374,21 @@ static int init_one(struct pci_dev *pdev,
goto clean_health;
}
 
+   err = devlink_register(devlink, >dev);
+   if (err)
+   goto clean_load;
+
return 0;
 
+clean_load:
+   mlx5_unload_one(dev, priv);
 clean_health:
mlx5_health_cleanup(dev);
 close_pci:
mlx5_pci_close(dev, priv);
 clean_dev:
pci_set_drvdata(pdev, NULL);
-   kfree(dev);
+   devlink_free(devlink);
 
return err;
 }
@@ -1380,8 +1396,10 @@ clean_dev:
 static void remove_one(struct pci_dev *pdev)
 {
struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
+   struct devlink *devlink = priv_to_devlink(dev);
struct mlx5_priv *priv = >priv;
 
+   devlink_unregister(devlink);
if (mlx5_unload_one(dev, priv)) {
dev_err(>pdev->dev, "mlx5_unload_one failed\n");
mlx5_health_cleanup(dev);
@@ -1390,7 +1408,7 @@ static void remove_one(struct pci_dev *pdev)
mlx5_health_cleanup(dev);
mlx5_pci_close(dev, priv);
pci_set_drvdata(p

[PATCH net-next V3 02/16] net/mlx5: E-Switch, Add support for the sriov offloads mode

2016-07-01 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Unlike the legacy mode, here, forwarding rules are not learned by the
driver per events on macs set by VFs/VMs into their vports, but rather
should be programmed by higher-level SW entities.

Saying that, still, in the offloads mode (SRIOV_OFFLOADS), two flow
groups are created by the driver for management (slow path) purposes:

The first group will be used for sending packets over e-switch vports
from the host OS where the e-switch management code runs, to be
received by VFs.

The second group will be used by a miss rule which forwards packets toward
the e-switch manager. Further logic will trap these packets such that
the receiving net-device as seen by the networking stack is the representor
of the vport that sent the packet over the e-switch data-path.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  35 +++---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  16 +++
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 135 +
 4 files changed, 168 insertions(+), 20 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile 
b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index c4f450f..96f1826 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -5,7 +5,7 @@ mlx5_core-y :=  main.o cmd.o debugfs.o fw.o eq.o uar.o 
pagealloc.o \
mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
fs_counters.o rl.o
 
-mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \
+mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \
en_rx_am.o en_txrx.o en_clock.o vxlan.o en_tc.o \
en_arfs.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 8068dde..1fc4cfd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -40,17 +40,6 @@
 
 #define UPLINK_VPORT 0x
 
-#define MLX5_DEBUG_ESWITCH_MASK BIT(3)
-
-#define esw_info(dev, format, ...) \
-   pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
-
-#define esw_warn(dev, format, ...) \
-   pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
-
-#define esw_debug(dev, format, ...)\
-   mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__)
-
 enum {
MLX5_ACTION_NONE = 0,
MLX5_ACTION_ADD  = 1,
@@ -92,6 +81,9 @@ enum {
MC_ADDR_CHANGE | \
PROMISC_CHANGE)
 
+int  esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports);
+void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw);
+
 static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
u32 events_mask)
 {
@@ -578,7 +570,8 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct 
vport_addr *vaddr)
if (err)
goto abort;
 
-   if (esw->fdb_table.fdb) /* SRIOV is enabled: Forward UC MAC to vport */
+   /* SRIOV is enabled: Forward UC MAC to vport */
+   if (esw->fdb_table.fdb && esw->mode == SRIOV_LEGACY)
vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
 
esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n",
@@ -1543,7 +1536,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, 
int vport_num)
 int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 {
int err;
-   int i;
+   int i, enabled_events;
 
if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
@@ -1562,18 +1555,19 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, 
int nvfs, int mode)
esw_warn(esw->dev, "E-Switch engress ACL is not supported by 
FW\n");
 
esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, 
mode);
-   if (mode != SRIOV_LEGACY)
-   return -EINVAL;
-
esw->mode = mode;
esw_disable_vport(esw, 0);
 
-   err = esw_create_legacy_fdb_table(esw, nvfs + 1);
+   if (mode == SRIOV_LEGACY)
+   err = esw_create_legacy_fdb_table(esw, nvfs + 1);
+   else
+   err = esw_create_offloads_fdb_table(esw, nvfs + 1);
if (err)
goto abort;
 
+   

[PATCH net-next V3 08/16] net/devlink: Add E-Switch mode control

2016-07-01 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Add the commands to set and show the mode of SRIOV E-Switch, two modes
are supported:

* legacy: operating in the "old" L2 based mode (DMAC --> VF vport)

* switchdev: the E-Switch is referred to as whitebox switch configured
using standard tools such as tc, bridge, openvswitch etc. To allow
working with the tools, for each VF, a VF representor netdevice is
created by the E-Switch manager vendor device driver instance (e.g PF).

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 include/net/devlink.h|  3 ++
 include/uapi/linux/devlink.h |  8 
 net/core/devlink.c   | 87 
 3 files changed, 98 insertions(+)

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 1d45b61..c99ffe8 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -90,6 +90,9 @@ struct devlink_ops {
   u16 tc_index,
   enum devlink_sb_pool_type pool_type,
   u32 *p_cur, u32 *p_max);
+
+   int (*eswitch_mode_get)(struct devlink *devlink, u16 *p_mode);
+   int (*eswitch_mode_set)(struct devlink *devlink, u16 mode);
 };
 
 static inline void *devlink_priv(struct devlink *devlink)
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index ba0073b..915bfa7 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -57,6 +57,8 @@ enum devlink_command {
DEVLINK_CMD_SB_OCC_SNAPSHOT,
DEVLINK_CMD_SB_OCC_MAX_CLEAR,
 
+   DEVLINK_CMD_ESWITCH_MODE_GET,
+   DEVLINK_CMD_ESWITCH_MODE_SET,
/* add new commands above here */
 
__DEVLINK_CMD_MAX,
@@ -95,6 +97,11 @@ enum devlink_sb_threshold_type {
 
 #define DEVLINK_SB_THRESHOLD_TO_ALPHA_MAX 20
 
+enum devlink_eswitch_mode {
+   DEVLINK_ESWITCH_MODE_LEGACY,
+   DEVLINK_ESWITCH_MODE_SWITCHDEV,
+};
+
 enum devlink_attr {
/* don't change the order or add anything between, this is ABI! */
DEVLINK_ATTR_UNSPEC,
@@ -125,6 +132,7 @@ enum devlink_attr {
DEVLINK_ATTR_SB_TC_INDEX,   /* u16 */
DEVLINK_ATTR_SB_OCC_CUR,/* u32 */
DEVLINK_ATTR_SB_OCC_MAX,/* u32 */
+   DEVLINK_ATTR_ESWITCH_MODE,  /* u16 */
 
/* add new attributes above here, update the policy in devlink.c */
 
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 933e8d4..b2e592a 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1394,6 +1394,78 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct 
sk_buff *skb,
return -EOPNOTSUPP;
 }
 
+static int devlink_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
+   enum devlink_command cmd, u32 portid,
+   u32 seq, int flags, u16 mode)
+{
+   void *hdr;
+
+   hdr = genlmsg_put(msg, portid, seq, _nl_family, flags, cmd);
+   if (!hdr)
+   return -EMSGSIZE;
+
+   if (devlink_nl_put_handle(msg, devlink))
+   goto nla_put_failure;
+
+   if (nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode))
+   goto nla_put_failure;
+
+   genlmsg_end(msg, hdr);
+   return 0;
+
+nla_put_failure:
+   genlmsg_cancel(msg, hdr);
+   return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb,
+   struct genl_info *info)
+{
+   struct devlink *devlink = info->user_ptr[0];
+   const struct devlink_ops *ops = devlink->ops;
+   struct sk_buff *msg;
+   u16 mode;
+   int err;
+
+   if (!ops || !ops->eswitch_mode_get)
+   return -EOPNOTSUPP;
+
+   err = ops->eswitch_mode_get(devlink, );
+   if (err)
+   return err;
+
+   msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+   if (!msg)
+   return -ENOMEM;
+
+   err = devlink_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_MODE_GET,
+  info->snd_portid, info->snd_seq, 0, mode);
+
+   if (err) {
+   nlmsg_free(msg);
+   return err;
+   }
+
+   return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_eswitch_mode_set_doit(struct sk_buff *skb,
+   struct genl_info *info)
+{
+   struct devlink *devlink = info->user_ptr[0];
+   const struct devlink_ops *ops = devlink->ops;
+   u16 mode;
+
+   if (!info->attrs[DEVLINK_ATTR_ESWITCH_MODE])
+   return -EINVAL;
+
+   mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
+
+   if (ops && ops->eswitch_mode_set)
+   return ops->eswitch_mode_set(devlink, mode);
+   return -EOPNOTSUPP;

[PATCH net-next V3 16/16] net/mlx5e: Introduce SRIOV VF representors

2016-07-01 Thread Saeed Mahameed
From: Hadar Hen Zion <had...@mellanox.com>

Implement the relevant profile functions to create mlx5e driver instance
serving as VF representor. When SRIOV offloads mode is enabled, each VF
will have a representor netdevice instance on the host.

To do that, we also export set of shared service functions from en_main.c,
such that they can be used by both NIC and repsresentors netdevs.

The newly created representor netdevice has a basic set of net_device_ops
which are the same ndo functions as the NIC netdevice and an ndo of it's
own for phys port name.

The profiling infrastructure allow sharing code between the NIC and the
vport representor even though the representor has only a subset of the
NIC functionality.

The VF reps and the PF which is used in that mode to represent the uplink,
expose switchdev ops. Currently the only op supposed is attr get for the
port parent ID which here serves to identify net-devices belonging to the
same HW E-Switch. Other than that, no offloading is implemented and hence
switching functionality is achieved if one sets SW switching rules, e.g
using tc, bridge or ovs.

Port phys name (ndo_get_phys_port_name) is implemented to allow exporting
to user-space the VF vport number and along with the switchdev port parent
id (phys_switch_id) enable a udev base consistent naming scheme:

SUBSYSTEM=="net", ACTION=="add", ATTR{phys_switch_id}=="", \
ATTR{phys_port_name}!="", NAME="$PF_NIC$attr{phys_port_name}"

where phys_switch_id is exposed by the PF (and VF reps) and $PF_NIC is
the name of the PF netdevice.

Signed-off-by: Hadar Hen Zion <had...@mellanox.com>
Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |  28 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  53 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   | 394 +
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  20 +-
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c |  96 -
 6 files changed, 574 insertions(+), 19 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile 
b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 9b14dad..a574dea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -8,6 +8,6 @@ mlx5_core-y :=  main.o cmd.o debugfs.o fw.o eq.o uar.o 
pagealloc.o \
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \
-   en_tc.o en_arfs.o
+   en_tc.o en_arfs.o en_rep.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 081259a..00643a1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -44,6 +44,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "wq.h"
 #include "mlx5_core.h"
 #include "en_stats.h"
@@ -816,4 +817,31 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev 
*mdev);
 void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
 int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev);
 
+struct mlx5_eswitch_rep;
+int mlx5e_vport_rep_load(struct mlx5_eswitch *esw,
+struct mlx5_eswitch_rep *rep);
+void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw,
+   struct mlx5_eswitch_rep *rep);
+int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep);
+void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep);
+int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
+void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
+int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr);
+
+int mlx5e_create_direct_rqts(struct mlx5e_priv *priv);
+void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
+int mlx5e_create_direct_tirs(struct mlx5e_priv *priv);
+void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv);
+int mlx5e_create_tises(struct mlx5e_priv *priv);
+void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv);
+int mlx5e_close(struct net_device *netdev);
+int mlx5e_open(struct net_device *netdev);
+void mlx5e_update_stats_work(struct work_struct *work);
+void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
+ const struct mlx5e_profile *profile, void *ppriv);
+void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv);
+struct rtnl_link_stats64 *
+mlx5e_get_stats(struct n

[PATCH net-next V3 14/16] net/mlx5e: Add support for multiple profiles

2016-07-01 Thread Saeed Mahameed
From: Hadar Hen Zion <had...@mellanox.com>

To allow support in representor netdevices where we create more than one
netdevice per NIC, add profiles to the mlx5e driver. The profiling
allows for creation of mlx5e instances with different characteristics.

Each profile implements its own behavior using set of function pointers
defined in struct mlx5e_profile. This is done to allow for avoiding complex
per profix branching in the code.

Currently only the profile for the conventional NIC is implemented,
which is of use when a netdev is created upon pci probe.

This patch doesn't add any new functionality.

Signed-off-by: Hadar Hen Zion <had...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  17 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 341 ++
 2 files changed, 240 insertions(+), 118 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 91c6bbe..edfc9be 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -568,6 +568,22 @@ enum {
MLX5E_NIC_PRIO
 };
 
+struct mlx5e_profile {
+   void(*init)(struct mlx5_core_dev *mdev,
+   struct net_device *netdev,
+   const struct mlx5e_profile *profile);
+   void(*cleanup)(struct mlx5e_priv *priv);
+   int (*init_rx)(struct mlx5e_priv *priv);
+   void(*cleanup_rx)(struct mlx5e_priv *priv);
+   int (*init_tx)(struct mlx5e_priv *priv);
+   void(*cleanup_tx)(struct mlx5e_priv *priv);
+   void(*enable)(struct mlx5e_priv *priv);
+   void(*disable)(struct mlx5e_priv *priv);
+   void(*update_stats)(struct mlx5e_priv *priv);
+   int (*max_nch)(struct mlx5_core_dev *mdev);
+   int max_tc;
+};
+
 struct mlx5e_priv {
/* priv data path fields - start */
struct mlx5e_sq**txq_to_sq_map;
@@ -601,6 +617,7 @@ struct mlx5e_priv {
struct mlx5e_stats stats;
struct mlx5e_tstamptstamp;
u16 q_counter;
+   const struct mlx5e_profile *profile;
 };
 
 enum mlx5e_link_mode {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 7f1f1ec..3e22c5e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -233,7 +233,7 @@ static void mlx5e_update_stats_work(struct work_struct 
*work)
   update_stats_work);
mutex_lock(>state_lock);
if (test_bit(MLX5E_STATE_OPENED, >state)) {
-   mlx5e_update_stats(priv);
+   priv->profile->update_stats(priv);
queue_delayed_work(priv->wq, dwork,
   
msecs_to_jiffies(MLX5E_UPDATE_STATS_INTERVAL));
}
@@ -1036,7 +1036,7 @@ static void mlx5e_build_channeltc_to_txq_map(struct 
mlx5e_priv *priv, int ix)
 {
int i;
 
-   for (i = 0; i < MLX5E_MAX_NUM_TC; i++)
+   for (i = 0; i < priv->profile->max_tc; i++)
priv->channeltc_to_txq_map[ix][i] =
ix + i * priv->params.num_channels;
 }
@@ -1524,21 +1524,20 @@ static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, 
struct mlx5e_rqt *rqt)
mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn);
 }
 
-static int mlx5e_create_rqts(struct mlx5e_priv *priv)
+static int mlx5e_create_indirect_rqts(struct mlx5e_priv *priv)
+{
+   struct mlx5e_rqt *rqt = >indir_rqt;
+
+   return mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt);
+}
+
+static int mlx5e_create_direct_rqts(struct mlx5e_priv *priv)
 {
-   int nch = mlx5e_get_max_num_channels(priv->mdev);
struct mlx5e_rqt *rqt;
int err;
int ix;
 
-   /* Indirect RQT */
-   rqt = >indir_rqt;
-   err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt);
-   if (err)
-   return err;
-
-   /* Direct RQTs */
-   for (ix = 0; ix < nch; ix++) {
+   for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) {
rqt = >direct_tir[ix].rqt;
err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqt);
if (err)
@@ -1551,22 +1550,9 @@ err_destroy_rqts:
for (ix--; ix >= 0; ix--)
mlx5e_destroy_rqt(priv, >direct_tir[ix].rqt);
 
-   mlx5e_destroy_rqt(priv, >indir_rqt);
-
return err;
 }
 
-static void mlx5e_destroy_rqts(struct mlx5e_priv *priv)
-{
-   int nch = mlx5e_get_max_num_channels(priv->mdev);
-   int i;
-
-   for (i = 0; i < nch; i++)
-   mlx5e_destroy_rqt(priv, >direct_tir[i].rqt);
-
-   mlx5e_destroy_rqt(priv, >indir_rqt);
-}
-
 int mlx5e_redirect_rqt(struct mlx5e_pri

[PATCH net-next V3 07/16] net/mlx5: E-Switch, Add API to create vport rx rules

2016-07-01 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Add the API to create vport rx rules of the form

packet meta-data :: vport == $VPORT --> $TIR

where the TIR is opened by this VF representor.

This logic will by used for packets that didn't match any rule in the
e-switch datapath and should be received into the host OS through the
netdevice that represents the VF they were sent from.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  4 +
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 85 ++
 2 files changed, 89 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 32db37a..cf959f7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -157,6 +157,7 @@ enum {
 
 struct mlx5_esw_offload {
struct mlx5_flow_table *ft_offloads;
+   struct mlx5_flow_group *vport_rx_group;
 };
 
 struct mlx5_eswitch {
@@ -201,6 +202,9 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 struct mlx5_flow_rule *
 mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 
sqn);
 
+struct mlx5_flow_rule *
+mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 
tirn);
+
 #define MLX5_DEBUG_ESWITCH_MASK BIT(3)
 
 #define esw_info(dev, format, ...) \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index e895c6f..7aad367 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -243,3 +243,88 @@ static void esw_destroy_offloads_table(struct mlx5_eswitch 
*esw)
 
mlx5_destroy_flow_table(offloads->ft_offloads);
 }
+
+static int esw_create_vport_rx_group(struct mlx5_eswitch *esw)
+{
+   int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+   struct mlx5_flow_group *g;
+   struct mlx5_priv *priv = >dev->priv;
+   u32 *flow_group_in;
+   void *match_criteria, *misc;
+   int err = 0;
+   int nvports = priv->sriov.num_vfs + 2;
+
+   flow_group_in = mlx5_vzalloc(inlen);
+   if (!flow_group_in)
+   return -ENOMEM;
+
+   /* create vport rx group */
+   memset(flow_group_in, 0, inlen);
+   MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+MLX5_MATCH_MISC_PARAMETERS);
+
+   match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, 
match_criteria);
+   misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters);
+   MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+   MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+   MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 
1);
+
+   g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in);
+
+   if (IS_ERR(g)) {
+   err = PTR_ERR(g);
+   mlx5_core_warn(esw->dev, "Failed to create vport rx group err 
%d\n", err);
+   goto out;
+   }
+
+   esw->offloads.vport_rx_group = g;
+out:
+   kfree(flow_group_in);
+   return err;
+}
+
+static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw)
+{
+   mlx5_destroy_flow_group(esw->offloads.vport_rx_group);
+}
+
+struct mlx5_flow_rule *
+mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 
tirn)
+{
+   struct mlx5_flow_destination dest;
+   struct mlx5_flow_rule *flow_rule;
+   int match_header = MLX5_MATCH_MISC_PARAMETERS;
+   u32 *match_v, *match_c;
+   void *misc;
+
+   match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+   match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+   if (!match_v || !match_c) {
+   esw_warn(esw->dev, "Failed to alloc match parameters\n");
+   flow_rule = ERR_PTR(-ENOMEM);
+   goto out;
+   }
+
+   misc = MLX5_ADDR_OF(fte_match_param, match_v, misc_parameters);
+   MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+
+   misc = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters);
+   MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+   dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+   dest.tir_num = tirn;
+
+   flow_rule = mlx5_add_flow_rule(esw->offloads.ft_offloads, match_header, 
match_c,
+  match_v, 
MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+  0, );
+   if (IS_ERR(flow_rule)) {
+   esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule 
err %ld\n", PTR_ERR(flow_rule));
+

[PATCH net-next V3 06/16] net/mlx5: E-Switch, Add offloads table

2016-07-01 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Belongs to the NIC offloads name-space, and to be used as part of the
SRIOV offloads logic to steer packets that hit the e-switch miss rule
to the TIR of the relevant VF representor.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  5 
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 31 ++
 2 files changed, 36 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index b7fabd1..32db37a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -155,6 +155,10 @@ enum {
SRIOV_OFFLOADS
 };
 
+struct mlx5_esw_offload {
+   struct mlx5_flow_table *ft_offloads;
+};
+
 struct mlx5_eswitch {
struct mlx5_core_dev*dev;
struct mlx5_l2_tablel2_table;
@@ -169,6 +173,7 @@ struct mlx5_eswitch {
 */
struct mutexstate_lock;
struct esw_mc_addr  *mc_promisc;
+   struct mlx5_esw_offload offloads;
int mode;
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 8964f71..e895c6f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -212,3 +212,34 @@ void esw_destroy_offloads_fdb_table(struct mlx5_eswitch 
*esw)
 
mlx5_destroy_flow_table(esw->fdb_table.fdb);
 }
+
+static int esw_create_offloads_table(struct mlx5_eswitch *esw)
+{
+   struct mlx5_flow_namespace *ns;
+   struct mlx5_flow_table *ft_offloads;
+   struct mlx5_core_dev *dev = esw->dev;
+   int err = 0;
+
+   ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS);
+   if (!ns) {
+   esw_warn(esw->dev, "Failed to get offloads flow namespace\n");
+   return -ENOMEM;
+   }
+
+   ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 
2, 0);
+   if (IS_ERR(ft_offloads)) {
+   err = PTR_ERR(ft_offloads);
+   esw_warn(esw->dev, "Failed to create offloads table, err %d\n", 
err);
+   return err;
+   }
+
+   esw->offloads.ft_offloads = ft_offloads;
+   return 0;
+}
+
+static void esw_destroy_offloads_table(struct mlx5_eswitch *esw)
+{
+   struct mlx5_esw_offload *offloads = >offloads;
+
+   mlx5_destroy_flow_table(offloads->ft_offloads);
+}
-- 
2.8.0



[PATCH net-next V3 05/16] net/mlx5: Introduce offloads steering namespace

2016-07-01 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Add a new namespace (MLX5_FLOW_NAMESPACE_OFFLOADS) to be populated
with flow steering rules that deal with rules that have have to
be executed before the EN NIC steering rules are matched.

The namespace is located after the bypass name-space and before the
kernel name-space. Therefore, it precedes the HW processing done for
rules set for the kernel NIC name-space.

Under SRIOV, it would allow us to match on e-switch missed packet
and forward them to the relevant VF representor TIR.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Amir Vadai <a...@vadai.me>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 11 ++-
 include/linux/mlx5/fs.h   |  1 +
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index e912a3d..b040110 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -83,6 +83,11 @@
 #define ANCHOR_NUM_LEVELS 1
 #define ANCHOR_NUM_PRIOS 1
 #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1)
+
+#define OFFLOADS_MAX_FT 1
+#define OFFLOADS_NUM_PRIOS 1
+#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1)
+
 struct node_caps {
size_t  arr_sz;
long*caps;
@@ -98,7 +103,7 @@ static struct init_tree_node {
int num_levels;
 } root_fs = {
.type = FS_TYPE_NAMESPACE,
-   .ar_size = 4,
+   .ar_size = 5,
.children = (struct init_tree_node[]) {
ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
 
FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
@@ -107,6 +112,9 @@ static struct init_tree_node {
  
FS_CAP(flow_table_properties_nic_receive.flow_table_modify)),
 ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
  BY_PASS_PRIO_NUM_LEVELS))),
+   ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {},
+ADD_NS(ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, 
OFFLOADS_MAX_FT))),
+
ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {},
 ADD_NS(ADD_MULTIPLE_PRIO(1, 1),
ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS,
@@ -1369,6 +1377,7 @@ struct mlx5_flow_namespace 
*mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 
switch (type) {
case MLX5_FLOW_NAMESPACE_BYPASS:
+   case MLX5_FLOW_NAMESPACE_OFFLOADS:
case MLX5_FLOW_NAMESPACE_KERNEL:
case MLX5_FLOW_NAMESPACE_LEFTOVERS:
case MLX5_FLOW_NAMESPACE_ANCHOR:
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 4b7a107..6ad1119 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -54,6 +54,7 @@ static inline void build_leftovers_ft_param(int *priority,
 
 enum mlx5_flow_namespace_type {
MLX5_FLOW_NAMESPACE_BYPASS,
+   MLX5_FLOW_NAMESPACE_OFFLOADS,
MLX5_FLOW_NAMESPACE_KERNEL,
MLX5_FLOW_NAMESPACE_LEFTOVERS,
MLX5_FLOW_NAMESPACE_ANCHOR,
-- 
2.8.0



[PATCH net-next V3 10/16] net/mlx5e: Add devlink based SRIOV mode changes

2016-07-01 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Implement handlers for the devlink commands to get and set the SRIOV
E-Switch mode.

When turning to the switchdev/offloads mode, we disable the e-switch
and enable it again in the new mode, create the NIC offloads table
and create VF reps.

When turning to legacy mode, we remove the VF reps and the offloads
table, and re-initiate the e-switch in it's legacy mode.

The actual creation/removal of the VF reps is done in downstream patches.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  12 +-
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 121 -
 2 files changed, 124 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 1fc4cfd..12f509c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -81,8 +81,8 @@ enum {
MC_ADDR_CHANGE | \
PROMISC_CHANGE)
 
-int  esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports);
-void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw);
+int esw_offloads_init(struct mlx5_eswitch *esw, int nvports);
+void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports);
 
 static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
u32 events_mask)
@@ -1561,7 +1561,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, 
int nvfs, int mode)
if (mode == SRIOV_LEGACY)
err = esw_create_legacy_fdb_table(esw, nvfs + 1);
else
-   err = esw_create_offloads_fdb_table(esw, nvfs + 1);
+   err = esw_offloads_init(esw, nvfs + 1);
if (err)
goto abort;
 
@@ -1581,6 +1581,7 @@ abort:
 void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 {
struct esw_mc_addr *mc_promisc;
+   int nvports;
int i;
 
if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
@@ -1591,6 +1592,7 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 esw->enabled_vports, esw->mode);
 
mc_promisc = esw->mc_promisc;
+   nvports = esw->enabled_vports;
 
for (i = 0; i < esw->total_vports; i++)
esw_disable_vport(esw, i);
@@ -1600,8 +1602,8 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 
if (esw->mode == SRIOV_LEGACY)
esw_destroy_legacy_fdb_table(esw);
-   else
-   esw_destroy_offloads_fdb_table(esw);
+   else if (esw->mode == SRIOV_OFFLOADS)
+   esw_offloads_cleanup(esw, nvports);
 
esw->mode = SRIOV_NONE;
/* VPORT 0 (PF) must be enabled back with non-sriov configuration */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index e1727a9..312b6f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -112,7 +112,7 @@ out:
 
 #define MAX_PF_SQ 256
 
-int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
+static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
 {
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_core_dev *dev = esw->dev;
@@ -200,7 +200,7 @@ ns_err:
return err;
 }
 
-void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw)
+static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw)
 {
if (!esw->fdb_table.fdb)
return;
@@ -329,12 +329,125 @@ out:
return flow_rule;
 }
 
+static int esw_offloads_start(struct mlx5_eswitch *esw)
+{
+   int err, num_vfs = esw->dev->priv.sriov.num_vfs;
+
+   if (esw->mode != SRIOV_LEGACY) {
+   esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not 
enabled\n");
+   return -EINVAL;
+   }
+
+   mlx5_eswitch_disable_sriov(esw);
+   err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
+   if (err)
+   esw_warn(esw->dev, "Failed set eswitch to offloads, err %d\n", 
err);
+   return err;
+}
+
+int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
+{
+   int err;
+
+   err = esw_create_offloads_fdb_table(esw, nvports);
+   if (err)
+   return err;
+
+   err = esw_create_offloads_table(esw);
+   if (err)
+   goto create_ft_err;
+
+   err = esw_create_vport_rx_group(esw);
+   if (err)
+   goto create_fg_err;
+
+   return 0;
+
+create_fg_err:
+   esw_destroy_offloads_table(esw);
+
+create_ft_err:
+   esw_destroy_offloads_f

Re: [PATCH net 00/13] Mellanox 100G mlx5 resiliency and xmit path fixes

2016-07-01 Thread Saeed Mahameed
On Fri, Jul 1, 2016 at 1:14 PM, David Miller <da...@davemloft.net> wrote:
> From: Saeed Mahameed <sae...@mellanox.com>
> Date: Thu, 30 Jun 2016 17:34:37 +0300
>
>> This series provides two set of fixes to the mlx5 driver:
>>   - Resiliency fixes for reset flow and internal pci errors
>>   - xmit path fixes
>
> Series applied to 'net' but expecting all of this to be backported
> to -stable is unreasonable.
>

Thanks Dave,

One small comment on this series is that it will hit two trivial
conflicts once net is merged into current net-next.

Conflict applying: "net/mlx5e: Timeout if SQ doesn't flush during close":
Fix:
 ---
@@@ -810,12 -802,19 +820,19 @@@ static void mlx5e_close_sq(struct mlx5e
if (mlx5e_sq_has_room_for(sq, 1))
mlx5e_send_nop(sq, true);

-   mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR,
-   false, 0);
 -  err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY,
 -MLX5_SQC_STATE_ERR);

++  err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY,
MLX5_SQC_STATE_ERR,
++false, 0);
+   if (err)
+   set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, >state);

---

Conflict applying: "net/mlx5e: Handle RQ flush in error cases"
Fix:

---
diff --cc drivers/net/ethernet/mellanox/mlx5/core/en.h
index 6db979e,b429591..000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h

@@@ -214,7 -191,7 +214,8 @@@ struct mlx5e_tstamp
  enum {
MLX5E_RQ_STATE_POST_WQES_ENABLE,
MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS,
 +MLX5E_RQ_STATE_AM,
+ MLX5E_RQ_STATE_FLUSH_TIMEOUT,
  };

---

Thanks,
Saeed


<    1   2   3   4   5   6   7   8   9   10   >