[PATCH net-next V2 01/10] net/mlx5: Store counters in rbtree instead of list
From: Amir Vadai <a...@vadai.me> In order to use bulk counters, we need to have counters sorted by id. Signed-off-by: Amir Vadai <a...@vadai.me> Reviewed-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 1 + .../net/ethernet/mellanox/mlx5/core/fs_counters.c | 64 ++ include/linux/mlx5/driver.h| 2 +- 3 files changed, 56 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index d7ba91a..9cffb6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -111,6 +111,7 @@ struct mlx5_fc_cache { }; struct mlx5_fc { + struct rb_node node; struct list_head list; /* last{packets,bytes} members are used when calculating the delta since diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 164dc37..aaf8fd1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -32,6 +32,7 @@ #include #include +#include #include "mlx5_core.h" #include "fs_core.h" #include "fs_cmd.h" @@ -68,6 +69,27 @@ * elapsed, the thread will actually query the hardware. */ +static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter) +{ + struct rb_node **new = >rb_node; + struct rb_node *parent = NULL; + + while (*new) { + struct mlx5_fc *this = container_of(*new, struct mlx5_fc, node); + int result = counter->id - this->id; + + parent = *new; + if (result < 0) + new = &((*new)->rb_left); + else + new = &((*new)->rb_right); + } + + /* Add new node and rebalance tree. */ + rb_link_node(>node, parent, new); + rb_insert_color(>node, root); +} + static void mlx5_fc_stats_work(struct work_struct *work) { struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev, @@ -75,25 +97,35 @@ static void mlx5_fc_stats_work(struct work_struct *work) struct mlx5_fc_stats *fc_stats = >priv.fc_stats; unsigned long now = jiffies; struct mlx5_fc *counter; - struct mlx5_fc *tmp; + struct rb_node *node; + LIST_HEAD(tmplist); int err = 0; spin_lock(_stats->addlist_lock); - list_splice_tail_init(_stats->addlist, _stats->list); + list_splice_tail_init(_stats->addlist, ); - if (!list_empty(_stats->list)) + if (!list_empty() || !RB_EMPTY_ROOT(_stats->counters)) queue_delayed_work(fc_stats->wq, _stats->work, MLX5_FC_STATS_PERIOD); spin_unlock(_stats->addlist_lock); - list_for_each_entry_safe(counter, tmp, _stats->list, list) { - struct mlx5_fc_cache *c = >cache; + list_for_each_entry(counter, , list) + mlx5_fc_stats_insert(_stats->counters, counter); + + node = rb_first(_stats->counters); + while (node) { + struct mlx5_fc_cache *c; u64 packets; u64 bytes; + counter = rb_entry(node, struct mlx5_fc, node); + c = >cache; + + node = rb_next(node); + if (counter->deleted) { - list_del(>list); + rb_erase(>node, _stats->counters); mlx5_cmd_fc_free(dev, counter->id); @@ -176,7 +208,7 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev) { struct mlx5_fc_stats *fc_stats = >priv.fc_stats; - INIT_LIST_HEAD(_stats->list); + fc_stats->counters = RB_ROOT; INIT_LIST_HEAD(_stats->addlist); spin_lock_init(_stats->addlist_lock); @@ -194,20 +226,32 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev) struct mlx5_fc_stats *fc_stats = >priv.fc_stats; struct mlx5_fc *counter; struct mlx5_fc *tmp; + struct rb_node *node; cancel_delayed_work_sync(>priv.fc_stats.work); destroy_workqueue(dev->priv.fc_stats.wq); dev->priv.fc_stats.wq = NULL; - list_splice_tail_init(_stats->addlist, _stats->list); - - list_for_each_entry_safe(counter, tmp, _stats->list, list) { + list_for_each_entry_safe(counter, tmp, _stats->addlist, list) { list_del(>list); mlx5_cmd_fc_free(dev, counter->id); kfree(counter); } + + node = rb_first(_stats->counters); + while (node) { + counter = rb_entry(node,
[PATCH net-next V2 02/10] net/mlx5: Introduce bulk reading of flow counters
From: Amir Vadai <a...@vadai.me> This commit utilize the ability of ConnectX-4 to bulk read flow counters. Few bulk counter queries could be done instead of issuing thousands of firmware commands per second to get statistics of all flows set to HW, such as those programmed when we offload tc filters. Counters are stored sorted by hardware id, and queried in blocks (id + number of counters). Due to hardware requirement, start of block and number of counters in a block must be four aligned. Reviewed-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Amir Vadai <a...@vadai.me> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 67 + drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h | 12 .../net/ethernet/mellanox/mlx5/core/fs_counters.c | 83 -- include/linux/mlx5/mlx5_ifc.h | 8 ++- 4 files changed, 146 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index a5bb6b6..9134010 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -413,3 +413,70 @@ int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, return 0; } + +struct mlx5_cmd_fc_bulk { + u16 id; + int num; + int outlen; + u32 out[0]; +}; + +struct mlx5_cmd_fc_bulk * +mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num) +{ + struct mlx5_cmd_fc_bulk *b; + int outlen = sizeof(*b) + + MLX5_ST_SZ_BYTES(query_flow_counter_out) + + MLX5_ST_SZ_BYTES(traffic_counter) * num; + + b = kzalloc(outlen, GFP_KERNEL); + if (!b) + return NULL; + + b->id = id; + b->num = num; + b->outlen = outlen; + + return b; +} + +void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b) +{ + kfree(b); +} + +int +mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b) +{ + u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_flow_counter_in, in, opcode, +MLX5_CMD_OP_QUERY_FLOW_COUNTER); + MLX5_SET(query_flow_counter_in, in, op_mod, 0); + MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id); + MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), + b->out, b->outlen); +} + +void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, + struct mlx5_cmd_fc_bulk *b, u16 id, + u64 *packets, u64 *bytes) +{ + int index = id - b->id; + void *stats; + + if (index < 0 || index >= b->num) { + mlx5_core_warn(dev, "Flow counter id (0x%x) out of range (0x%x..0x%x). Counter ignored.\n", + id, b->id, b->id + b->num - 1); + return; + } + + stats = MLX5_ADDR_OF(query_flow_counter_out, b->out, +flow_statistics[index]); + *packets = MLX5_GET64(traffic_counter, stats, packets); + *bytes = MLX5_GET64(traffic_counter, stats, octets); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index fc4f7b8..158844c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -76,4 +76,16 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id); int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id); int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, u64 *packets, u64 *bytes); + +struct mlx5_cmd_fc_bulk; + +struct mlx5_cmd_fc_bulk * +mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num); +void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b); +int +mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b); +void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, + struct mlx5_cmd_fc_bulk *b, u16 id, + u64 *packets, u64 *bytes); + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index aaf8fd1..c2877e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -90,16 +90,66 @@ static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter) rb_insert_color(>node, root); } +static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev, + struct mlx5_fc *first, + u16 last_id) +{ + struct mlx5_cmd_fc_bu
[PATCH net-next V2 05/10] net/mlx5: E-Switch, Add API to configure rules for the offloaded mode
From: Or Gerlitz <ogerl...@mellanox.com> This allows for upper levels in the driver, e.g the TC offload code to add e-switch offloaded steering rules. The caller provides the rule spec for matching, action, source and destination vports. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 6 +++ .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 43 ++ 2 files changed, 49 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 035e536..c0b0560 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -222,6 +222,12 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, int vport, struct ifla_vf_stats *vf_stats); +struct mlx5_flow_spec; + +struct mlx5_flow_rule * +mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + u32 action, u32 src_vport, u32 dst_vport); struct mlx5_flow_rule * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 27122c0..a357e8e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -43,6 +43,49 @@ enum { FDB_SLOW_PATH }; +struct mlx5_flow_rule * +mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + u32 action, u32 src_vport, u32 dst_vport) +{ + struct mlx5_flow_destination dest = { 0 }; + struct mlx5_fc *counter = NULL; + struct mlx5_flow_rule *rule; + void *misc; + + if (esw->mode != SRIOV_OFFLOADS) + return ERR_PTR(-EOPNOTSUPP); + + if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport_num = dst_vport; + action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + counter = mlx5_fc_create(esw->dev, true); + if (IS_ERR(counter)) + return ERR_CAST(counter); + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter = counter; + } + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, src_vport); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | + MLX5_MATCH_MISC_PARAMETERS; + + rule = mlx5_add_flow_rule((struct mlx5_flow_table *)esw->fdb_table.fdb, + spec, action, 0, ); + + if (IS_ERR(rule)) + mlx5_fc_destroy(esw->dev, counter); + + return rule; +} + static struct mlx5_flow_rule * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) { -- 2.8.0
[PATCH net-next V2 03/10] net/mlx5e: Offload TC flow counters only when supported
From: Or Gerlitz <ogerl...@mellanox.com> Currenly, the code that programs the flow actions into the firmware doesn't check if was actually asked to offload the statistics, fix that. Fixes: aad7e08d39bd ('net/mlx5e: Hardware offloaded flower filter statistics support') Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3261e8b..cd58fc8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -62,7 +62,7 @@ static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv, if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = priv->fs.vlan.ft.t; - } else { + } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(dev, true); if (IS_ERR(counter)) return ERR_CAST(counter); -- 2.8.0
[PATCH net-next V2 07/10] net/switchdev: Export the same parent ID service function
From: Or Gerlitz <ogerl...@mellanox.com> This helper serves to know if two switchdev port netdevices belong to the same HW ASIC, e.g to figure out if forwarding offload is possible between them. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- include/net/switchdev.h | 8 net/switchdev/switchdev.c | 5 +++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 985619a..9023e3e 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -227,6 +227,8 @@ void switchdev_port_fwd_mark_set(struct net_device *dev, struct net_device *group_dev, bool joining); +bool switchdev_port_same_parent_id(struct net_device *a, + struct net_device *b); #else static inline void switchdev_deferred_process(void) @@ -351,6 +353,12 @@ static inline void switchdev_port_fwd_mark_set(struct net_device *dev, { } +static inline bool switchdev_port_same_parent_id(struct net_device *a, +struct net_device *b) +{ + return false; +} + #endif #endif /* _LINUX_SWITCHDEV_H_ */ diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 59658b2..a5fc9dd 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1286,8 +1286,8 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi) } EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); -static bool switchdev_port_same_parent_id(struct net_device *a, - struct net_device *b) +bool switchdev_port_same_parent_id(struct net_device *a, + struct net_device *b) { struct switchdev_attr a_attr = { .orig_dev = a, @@ -1323,6 +1323,7 @@ static u32 switchdev_port_fwd_mark_get(struct net_device *dev, return dev->ifindex; } +EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id); static void switchdev_port_fwd_mark_reset(struct net_device *group_dev, u32 old_mark, u32 *reset_mark) -- 2.8.0
[PATCH net-next V2 08/10] net/mlx5e: Add TC drop and mirred/redirect action parsing for SRIOV offloads
From: Or Gerlitz <ogerl...@mellanox.com> Add the setup code that parses the TC actions needed to support offloading drop and mirred/redirect for SRIOV e-switch. We can redirect between two devices if they belong to the same HW switch, compare the switchdev HW ID attribute to enforce that. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 53 + 1 file changed, 53 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 57b76f7..9a66441 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -37,8 +37,11 @@ #include #include #include +#include +#include #include "en.h" #include "en_tc.h" +#include "eswitch.h" struct mlx5e_tc_flow { struct rhash_head node; @@ -339,6 +342,56 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return 0; } +static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, + u32 *action, u32 *dest_vport) +{ + const struct tc_action *a; + + if (tc_no_actions(exts)) + return -EINVAL; + + *action = 0; + + tc_for_each_action(a, exts) { + /* Only support a single action per rule */ + if (*action) + return -EINVAL; + + if (is_tcf_gact_shot(a)) { + *action = MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + continue; + } + + if (is_tcf_mirred_redirect(a)) { + int ifindex = tcf_mirred_ifindex(a); + struct net_device *out_dev; + struct mlx5e_priv *out_priv; + struct mlx5_eswitch_rep *out_rep; + + out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex); + + if (!switchdev_port_same_parent_id(priv->netdev, out_dev)) { + pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", + priv->netdev->name, out_dev->name); + return -EINVAL; + } + + out_priv = netdev_priv(out_dev); + out_rep = out_priv->ppriv; + if (out_rep->vport == 0) + *dest_vport = FDB_UPLINK_VPORT; + else + *dest_vport = out_rep->vport; + *action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + continue; + } + + return -EINVAL; + } + return 0; +} + int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, struct tc_cls_flower_offload *f) { -- 2.8.0
[PATCH net-next V2 04/10] net/mlx5: E-Switch, Use two priorities for SRIOV offloads mode
From: Or Gerlitz <ogerl...@mellanox.com> In the offloads mode, some slow path rules are added by the driver (e.g send-to-vport), while offloaded rules are to be added from upper layers. The slow path rules have lower priority and we don't want matching on offloaded rules to suffer from extra steering hops related to the slow path rules. We use two priorities, one for offloaded rules (fast path), and one for the control rules (slow path). To allow for that, we enable two priorities for the FDB namespace in the FS core code. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 1 + .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 34 +- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 22 +- 3 files changed, 42 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 7b45e6a..035e536 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -145,6 +145,7 @@ struct mlx5_eswitch_fdb { } legacy; struct offloads_fdb { + struct mlx5_flow_table *fdb; struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *miss_grp; struct mlx5_flow_rule *miss_rule; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 1842dfb..27122c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -38,6 +38,11 @@ #include "mlx5_core.h" #include "eswitch.h" +enum { + FDB_FAST_PATH = 0, + FDB_SLOW_PATH +}; + static struct mlx5_flow_rule * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) { @@ -149,7 +154,7 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport_num = 0; - flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, spec, + flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, 0, ); if (IS_ERR(flow_rule)) { @@ -165,6 +170,8 @@ out: } #define MAX_PF_SQ 256 +#define ESW_OFFLOADS_NUM_ENTRIES (1 << 13) /* 8K */ +#define ESW_OFFLOADS_NUM_GROUPS 4 static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) { @@ -190,15 +197,25 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) esw_debug(dev, "Create offloads FDB table, log_max_size(%d)\n", MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - table_size = nvports + MAX_PF_SQ + 1; - fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0); + fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH, + ESW_OFFLOADS_NUM_ENTRIES, + ESW_OFFLOADS_NUM_GROUPS, 0); if (IS_ERR(fdb)) { err = PTR_ERR(fdb); - esw_warn(dev, "Failed to create FDB Table err %d\n", err); - goto fdb_err; + esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err); + goto fast_fdb_err; } esw->fdb_table.fdb = fdb; + table_size = nvports + MAX_PF_SQ + 1; + fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err); + goto slow_fdb_err; + } + esw->fdb_table.offloads.fdb = fdb; + /* create send-to-vport group */ memset(flow_group_in, 0, inlen); MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, @@ -247,8 +264,10 @@ miss_rule_err: miss_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); send_vport_err: - mlx5_destroy_flow_table(fdb); -fdb_err: + mlx5_destroy_flow_table(esw->fdb_table.offloads.fdb); +slow_fdb_err: + mlx5_destroy_flow_table(esw->fdb_table.fdb); +fast_fdb_err: ns_err: kvfree(flow_group_in); return err; @@ -264,6 +283,7 @@ static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); + mlx5_destroy_flow_table(esw->fdb_table.offloads.fdb); mlx5_dest
[PATCH net-next V2 10/10] net/mlx5e: Add TC offload support for the VF representors netdevice
From: Or Gerlitz <ogerl...@mellanox.com> The VF representors support only TC filter/action offloads (not mqprio) and this is enabled for them by default. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 35 +++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index fdaf2fa..1c7d8b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "eswitch.h" #include "en.h" @@ -222,6 +223,29 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev, return 0; } +static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle, + __be16 proto, struct tc_to_netdev *tc) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS)) + return -EOPNOTSUPP; + + switch (tc->type) { + case TC_SETUP_CLSFLOWER: + switch (tc->cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return mlx5e_configure_flower(priv, proto, tc->cls_flower); + case TC_CLSFLOWER_DESTROY: + return mlx5e_delete_flower(priv, tc->cls_flower); + case TC_CLSFLOWER_STATS: + return mlx5e_stats_flower(priv, tc->cls_flower); + } + default: + return -EOPNOTSUPP; + } +} + static const struct switchdev_ops mlx5e_rep_switchdev_ops = { .switchdev_port_attr_get= mlx5e_attr_get, }; @@ -231,6 +255,7 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = { .ndo_stop= mlx5e_close, .ndo_start_xmit = mlx5e_xmit, .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, + .ndo_setup_tc= mlx5e_rep_ndo_setup_tc, .ndo_get_stats64 = mlx5e_get_stats, }; @@ -284,7 +309,8 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->switchdev_ops = _rep_switchdev_ops; #endif - netdev->features |= NETIF_F_VLAN_CHALLENGED; + netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC; + netdev->hw_features |= NETIF_F_HW_TC; eth_hw_addr_random(netdev); } @@ -328,8 +354,14 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) } rep->vport_rx_rule = flow_rule; + err = mlx5e_tc_init(priv); + if (err) + goto err_del_flow_rule; + return 0; +err_del_flow_rule: + mlx5_del_flow_rule(rep->vport_rx_rule); err_destroy_direct_tirs: mlx5e_destroy_direct_tirs(priv); err_destroy_direct_rqts: @@ -343,6 +375,7 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) struct mlx5_eswitch_rep *rep = priv->ppriv; int i; + mlx5e_tc_cleanup(priv); mlx5_del_flow_rule(rep->vport_rx_rule); mlx5e_destroy_direct_tirs(priv); for (i = 0; i < priv->params.num_channels; i++) -- 2.8.0
[PATCH net-next V2 09/10] net/mlx5e: Add TC HW support for FDB (SRIOV e-switch) offloads
From: Or Gerlitz <ogerl...@mellanox.com> Enhance the TC offload code such that when the eswitch exists and it's mode being SRIOV offloads, we do TC actions parsing and setup targeted for eswitch. Next, we add the offloaded flow to the HW e-switch (fdb). Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 5 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 35 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 5ef02f0..fdaf2fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -36,6 +36,7 @@ #include "eswitch.h" #include "en.h" +#include "en_tc.h" static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; @@ -201,6 +202,10 @@ void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, if (test_bit(MLX5E_STATE_OPENED, >state)) mlx5e_remove_sqs_fwd_rules(priv); + + /* clean (and re-init) existing uplink offloaded TC rules */ + mlx5e_tc_cleanup(priv); + mlx5e_tc_init(priv); } static int mlx5e_rep_get_phys_port_name(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9a66441..0f19b01 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -112,6 +112,22 @@ err_create_ft: return rule; } +static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + u32 action, u32 dst_vport) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_eswitch_rep *rep = priv->ppriv; + u32 src_vport; + + if (rep->vport) /* set source vport for the flow */ + src_vport = rep->vport; + else + src_vport = FDB_UPLINK_VPORT; + + return mlx5_eswitch_add_offloaded_rule(esw, spec, action, src_vport, dst_vport); +} + static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5_flow_rule *rule) { @@ -397,11 +413,11 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, { struct mlx5e_tc_table *tc = >fs.tc; int err = 0; - u32 flow_tag; - u32 action; + u32 flow_tag, action, dest_vport = 0; struct mlx5e_tc_flow *flow; struct mlx5_flow_spec *spec; struct mlx5_flow_rule *old = NULL; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; flow = rhashtable_lookup_fast(>ht, >cookie, tc->ht_params); @@ -422,11 +438,18 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, if (err < 0) goto err_free; - err = parse_tc_nic_actions(priv, f->exts, , _tag); - if (err < 0) - goto err_free; + if (esw && esw->mode == SRIOV_OFFLOADS) { + err = parse_tc_fdb_actions(priv, f->exts, , _vport); + if (err < 0) + goto err_free; + flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, action, dest_vport); + } else { + err = parse_tc_nic_actions(priv, f->exts, , _tag); + if (err < 0) + goto err_free; + flow->rule = mlx5e_tc_add_nic_flow(priv, spec, action, flow_tag); + } - flow->rule = mlx5e_tc_add_nic_flow(priv, spec, action, flow_tag); if (IS_ERR(flow->rule)) { err = PTR_ERR(flow->rule); goto err_free; -- 2.8.0
[PATCH net-next V2 06/10] net/mlx5e: Adjustments in the TC offload code towards reuse for SRIOV
From: Or Gerlitz <ogerl...@mellanox.com> Towards reusing the TC offloads code for an SRIOV use-case, change some of the helper functions to have _nic in their names so it's clear what's NIC unique and what's general. Also group together the NIC related helpers so we can easily branch per the use-case in downstream patch. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 32 - 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index cd58fc8..57b76f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -49,9 +49,9 @@ struct mlx5e_tc_flow { #define MLX5E_TC_TABLE_NUM_ENTRIES 1024 #define MLX5E_TC_TABLE_NUM_GROUPS 4 -static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - u32 action, u32 flow_tag) +static struct mlx5_flow_rule *mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + u32 action, u32 flow_tag) { struct mlx5_core_dev *dev = priv->mdev; struct mlx5_flow_destination dest = { 0 }; @@ -120,7 +120,7 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, mlx5_fc_destroy(priv->mdev, counter); - if (!mlx5e_tc_num_filters(priv)) { + if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) { mlx5_destroy_flow_table(priv->fs.tc.t); priv->fs.tc.t = NULL; } @@ -295,8 +295,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec return 0; } -static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, - u32 *action, u32 *flow_tag) +static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, + u32 *action, u32 *flow_tag) { const struct tc_action *a; @@ -369,28 +369,28 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, if (err < 0) goto err_free; - err = parse_tc_actions(priv, f->exts, , _tag); + err = parse_tc_nic_actions(priv, f->exts, , _tag); if (err < 0) goto err_free; - err = rhashtable_insert_fast(>ht, >node, -tc->ht_params); - if (err) - goto err_free; - - flow->rule = mlx5e_tc_add_flow(priv, spec, action, flow_tag); + flow->rule = mlx5e_tc_add_nic_flow(priv, spec, action, flow_tag); if (IS_ERR(flow->rule)) { err = PTR_ERR(flow->rule); - goto err_hash_del; + goto err_free; } + err = rhashtable_insert_fast(>ht, >node, +tc->ht_params); + if (err) + goto err_del_rule; + if (old) mlx5e_tc_del_flow(priv, old); goto out; -err_hash_del: - rhashtable_remove_fast(>ht, >node, tc->ht_params); +err_del_rule: + mlx5_del_flow_rule(flow->rule); err_free: if (!old) -- 2.8.0
[PATCH net-next V2 00/10] Mellanox 100G mlx5 Bulk flow statistics and SRIOV TC offloads
Hi Dave, This series from Amir and Or deals with two enhancements for the mlx5 TC offloads. The 1st two patches add bulk reading of flow counters. Few bulk counter queries are used instead of issuing thousands firmware commands per second to get statistics of all flows set to HW. The next patches add TC based SRIOV offloading to mlx5, as a follow up for the e-switch offloads mode and the VF representors. When the e-switch is set to the (new) "offloads" mode, we can now offload TC/flower drop and forward rules, the forward action we offload is TC mirred/redirect. The above is done by the VF representor netdevices exporting the setup_tc ndo where from there we're re-using and enhancing the existing mlx5 TC offloads sub-module which now works for both the NIC and the SRIOV cases. The series is applied on top b38a75d2d324 ('mlxsw: core: Trace EMAD messages') and it has no merge issues with the on-going net submission ('mlx5 tx timeout watchdog fixes') V2: - Fixed compilation warning. Thanks, Saeed. Amir Vadai (2): net/mlx5: Store counters in rbtree instead of list net/mlx5: Introduce bulk reading of flow counters Or Gerlitz (8): net/mlx5e: Offload TC flow counters only when supported net/mlx5: E-Switch, Use two priorities for SRIOV offloads mode net/mlx5: E-Switch, Add API to configure rules for the offloaded mode net/mlx5e: Adjustments in the TC offload code towards reuse for SRIOV net/switchdev: Export the same parent ID service function net/mlx5e: Add TC drop and mirred/redirect action parsing for SRIOV offloads net/mlx5e: Add TC HW support for FDB (SRIOV e-switch) offloads net/mlx5e: Add TC offload support for the VF representors netdevice drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 40 +- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c| 116 ++--- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 7 + .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 77 ++- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 67 ++ drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h | 12 ++ drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 22 ++-- drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 1 + .../net/ethernet/mellanox/mlx5/core/fs_counters.c | 141 - include/linux/mlx5/driver.h| 2 +- include/linux/mlx5/mlx5_ifc.h | 8 +- include/net/switchdev.h| 8 ++ net/switchdev/switchdev.c | 5 +- 13 files changed, 436 insertions(+), 70 deletions(-) -- 2.8.0
Re: mlx5 PF and VF Vlan Restrictions
On Thu, Jul 14, 2016 at 11:27 AM, domingo montoyawrote: > Hello, > > Can we have VLAN restrictions at both PF and VF level in mlx5 CX4 driver? > > For a particular VF, I would like to restrict VLANs from the PF driver > > For e.g, Let's say there is VF0. I would like to restrict the allowed > vlans from the VF0 to be one of 10,20,30. > > I would like to do this enforcement at the PF driver. > > How do i do this? > in mlx5 driver you will need to configure both egress and ingress ACLs with the allowed vlan list same as we did in VST but without the HW vlan insertion/stripping. i.e don't configure the HW (don't call modify_esw_vport_cvlan with non 0 vlan and qos). today for vst mode we allow one vlan: - VST vlan On VF TX is enforced via modify_esw_vport_cvlan command to insert inforced vlan for all VF TX packets - VST vlan On VF RX is enforced via ingress ACL (Allowed vlan rule). You need - VGT vlan list allowed ACL rules in both ingress and egress ACLs and drop all other traffic. > > So if the VF tries to configure any vlan other than 10,20,30, it should fail. > > Also how to communicate the vlans that are allowed to the vf from the PF. you don't need to communicate them, the vf should initiate VGT Vlans and try to work in best effort manner, if the PF allows it it will work. if not, traffic won't pass. > > > Is this feature already implemented in the mlx5 ? > No, missing kernel/userspace API for PF admin to configure allowed vlan list. > > If not, can anyone please help me understand how to do that. > add new command for iproute to configure allowed vlan list per VF it should be mutually exclusive with VST. add new ndo to configure vlan list for VF in device driver, better if we extend the existing ndo (set_vf_valn). Thanks, Saeed.
[PATCH net V2] net/mlx5e: Fix del vxlan port command buffer memset
memset the command buffers rather than the pointers to them. Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling") Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- V2: - Improved log message. drivers/net/ethernet/mellanox/mlx5/core/vxlan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c index 05de772..e25a73ed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c @@ -72,8 +72,8 @@ static int mlx5e_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port) u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)]; u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)]; - memset(, 0, sizeof(in)); - memset(, 0, sizeof(out)); + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); MLX5_SET(delete_vxlan_udp_dport_in, in, opcode, MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT); -- 2.8.0
[PATCH net-next 0/2] Mellanox 100G mlx5 minimum inline header mode
Hi Dave, This small series from Hadar adds the support for minimum inline header mode query in mlx5e NIC driver. Today on TX the driver copies to the HW descriptor only up to L2 header which is the default required mode and sufficient for today's needs. The header in the HW descriptor is used for HW loopback steering decision, without it packets will go directly to the wire with no questions asked. For TX loopback steering according to L2/L3/L4 headers, ConnectX-4 requires to copy the corresponding headers into the send queue(SQ) WQE HW descriptor so it can decide whether to loop it back or to forward to wire. For legacy E-Switch mode only L2 headers copy is required. For advanced steering (E-Switch offloads) more header layers may be required to be copied, the required mode will be advertised by FW to each VF and PF according to the corresponding E-Switch configuration. Thanks, Saeed. Hadar Hen Zion (2): net/mlx5e: Check the minimum inline header mode before xmit net/mlx5e: Query minimum required header copy during xmit drivers/net/ethernet/mellanox/mlx5/core/en.h | 8 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 24 +++ drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 49 +-- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 18 + include/linux/mlx5/device.h | 7 include/linux/mlx5/mlx5_ifc.h | 10 +++-- include/linux/mlx5/vport.h| 2 + 7 files changed, 111 insertions(+), 7 deletions(-) -- 2.8.0
[PATCH net-next 2/2] net/mlx5e: Query minimum required header copy during xmit
From: Hadar Hen Zion <had...@mellanox.com> Add support for query the minimum inline mode from the Firmware. It is required for correct TX steering according to L3/L4 packet headers. Each send queue (SQ) has inline mode that defines the minimal required headers that needs to be copied into the SQ WQE. The driver asks the Firmware for the wqe_inline_mode device capability value. In case the device capability defined as "vport context" the driver must check the reported min inline mode from the vport context before creating its SQs. Signed-off-by: Hadar Hen Zion <had...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 7 +++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 24 +++ drivers/net/ethernet/mellanox/mlx5/core/vport.c | 18 + include/linux/mlx5/mlx5_ifc.h | 10 +++--- include/linux/mlx5/vport.h| 2 ++ 5 files changed, 58 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 2c20c7b..1b495ef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -129,6 +129,12 @@ static inline int mlx5_max_log_rq_size(int wq_type) } } +enum { + MLX5E_INLINE_MODE_L2, + MLX5E_INLINE_MODE_VPORT_CONTEXT, + MLX5_INLINE_MODE_NOT_REQUIRED, +}; + struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_eth_seg eth; @@ -188,6 +194,7 @@ struct mlx5e_params { bool lro_en; u32 lro_wqe_sz; u16 tx_max_inline; + u8 tx_min_inline_mode; u8 rss_hfunc; u8 toeplitz_hash_key[40]; u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 611ab55..ca7b1e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -56,6 +56,7 @@ struct mlx5e_sq_param { u32sqc[MLX5_ST_SZ_DW(sqc)]; struct mlx5_wq_param wq; u16max_inline; + u8 min_inline_mode; bool icosq; }; @@ -649,6 +650,9 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, } sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2; sq->max_inline = param->max_inline; + sq->min_inline_mode = + MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5E_INLINE_MODE_VPORT_CONTEXT ? + param->min_inline_mode : 0; err = mlx5e_alloc_sq_db(sq, cpu_to_node(c->cpu)); if (err) @@ -731,6 +735,7 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) MLX5_SET(sqc, sqc, tis_num_0, param->icosq ? 0 : priv->tisn[sq->tc]); MLX5_SET(sqc, sqc, cqn,sq->cq.mcq.cqn); + MLX5_SET(sqc, sqc, min_wqe_inline_mode, sq->min_inline_mode); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(sqc, sqc, tis_lst_sz, param->icosq ? 0 : 1); MLX5_SET(sqc, sqc, flush_in_error_en, 1); @@ -1343,6 +1348,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); param->max_inline = priv->params.tx_max_inline; + param->min_inline_mode = priv->params.tx_min_inline_mode; } static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, @@ -2967,6 +2973,23 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; } +static void mlx5e_query_min_inline(struct mlx5_core_dev *mdev, + u8 *min_inline_mode) +{ + switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) { + case MLX5E_INLINE_MODE_L2: + *min_inline_mode = MLX5_INLINE_MODE_L2; + break; + case MLX5E_INLINE_MODE_VPORT_CONTEXT: + mlx5_query_nic_vport_min_inline(mdev, + min_inline_mode); + break; + case MLX5_INLINE_MODE_NOT_REQUIRED: + *min_inline_mode = MLX5_INLINE_MODE_NONE; + break; + } +} + static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, struct net_device *netdev, const struct mlx5e_profile *profile, @@ -3032,6 +3055,7 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, priv->params.tx_cq_moderation.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MOD
[PATCH net-next 1/2] net/mlx5e: Check the minimum inline header mode before xmit
From: Hadar Hen Zion <had...@mellanox.com> Each send queue (SQ) has inline mode that defines the minimal required inline headers in the SQ WQE. Before sending each packet check that the minimum required headers on the WQE are copied. Signed-off-by: Hadar Hen Zion <had...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en.h| 1 + drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 49 +++-- include/linux/mlx5/device.h | 7 3 files changed, 53 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 4cbd452..2c20c7b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -398,6 +398,7 @@ struct mlx5e_sq { u32sqn; u16bf_buf_size; u16max_inline; + u8 min_inline_mode; u16edge; struct device *pdev; struct mlx5e_tstamp *tstamp; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 5740b46..e073bf59 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -128,6 +128,50 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, return priv->channeltc_to_txq_map[channel_ix][up]; } +static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb) +{ +#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN) + + return max(skb_network_offset(skb), MLX5E_MIN_INLINE); +} + +static inline int mlx5e_skb_l3_header_offset(struct sk_buff *skb) +{ + struct flow_keys keys; + + if (skb_transport_header_was_set(skb)) + return skb_transport_offset(skb); + else if (skb_flow_dissect_flow_keys(skb, , 0)) + return keys.control.thoff; + else + return mlx5e_skb_l2_header_offset(skb); +} + +static inline unsigned int mlx5e_calc_min_inline(enum mlx5_inline_modes mode, +struct sk_buff *skb) +{ + int hlen; + + switch (mode) { + case MLX5_INLINE_MODE_TCP_UDP: + hlen = eth_get_headlen(skb->data, skb_headlen(skb)); + if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb)) + hlen += VLAN_HLEN; + return hlen; + case MLX5_INLINE_MODE_IP: + /* When transport header is set to zero, it means no transport +* header. When transport header is set to 0xff's, it means +* transport header wasn't set. +*/ + if (skb_transport_offset(skb)) + return mlx5e_skb_l3_header_offset(skb); + /* fall through */ + case MLX5_INLINE_MODE_L2: + default: + return mlx5e_skb_l2_header_offset(skb); + } +} + static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, struct sk_buff *skb, bool bf) { @@ -135,8 +179,6 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, * headers and occur before the data gather. * Therefore these headers must be copied into the WQE */ -#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN) - if (bf) { u16 ihs = skb_headlen(skb); @@ -146,8 +188,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, if (ihs <= sq->max_inline) return skb_headlen(skb); } - - return max(skb_network_offset(skb), MLX5E_MIN_INLINE); + return mlx5e_calc_min_inline(sq->min_inline_mode, skb); } static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data, diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index e0a3ed7..0b6d15c 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -129,6 +129,13 @@ __mlx5_mask(typ, fld)) tmp; \ }) +enum mlx5_inline_modes { + MLX5_INLINE_MODE_NONE, + MLX5_INLINE_MODE_L2, + MLX5_INLINE_MODE_IP, + MLX5_INLINE_MODE_TCP_UDP, +}; + enum { MLX5_MAX_COMMANDS = 32, MLX5_CMD_DATA_BLOCK_SIZE= 512, -- 2.8.0
Re: [PATCH net V2] net/mlx5e: Fix del vxlan port command buffer memset
On Thu, Jul 21, 2016 at 3:53 AM, Alexei Starovoitov <alexei.starovoi...@gmail.com> wrote: > On Thu, Jul 21, 2016 at 12:39:53AM +0300, Saeed Mahameed wrote: >> memset the command buffers rather than the pointers to them. > > that is still wrong commit log. > This patch makes zero difference to generated code. > '' is the same as 'in' > Who said they are not ? There was a mistake in the original log message and it was fixed here. The patch was made to make the code consistent with other places in the code, and i don't see anything wrong with the log message.
[PATCH net V2] net/bonding: Enforce active-backup policy for IPoIB bonds
From: Mark Bloch <ma...@mellanox.com> When using an IPoIB bond currently only active-backup mode is a valid use case and this commit strengthens it. Since commit 2ab82852a270 ("net/bonding: Enable bonding to enslave netdevices not supporting set_mac_address()") was introduced till 4.7-rc1, IPoIB didn't support the set_mac_address ndo, and hence the fail over mac policy always applied to IPoIB bonds. With the introduction of commit 492a7e67ff83 ("IB/IPoIB: Allow setting the device address"), that doesn't hold and practically IPoIB bonds are broken as of that. To fix it, lets go to fail over mac if the device doesn't support the ndo OR this is IPoIB device. As a by-product, this commit also prevents a stack corruption which occurred when trying to copy 20 bytes (IPoIB) device address to a sockaddr struct that has only 16 bytes of storage. Signed-off-by: Mark Bloch <ma...@mellanox.com> Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- Changes from v0: - Set res to -EOPNOTSUPP before jumping to err_undo_flags. drivers/net/bonding/bond_main.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a2afa3b..4d79819 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1422,7 +1422,16 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) return -EINVAL; } - if (slave_ops->ndo_set_mac_address == NULL) { + if (slave_dev->type == ARPHRD_INFINIBAND && + BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { + netdev_warn(bond_dev, "Type (%d) supports only active-backup mode\n", + slave_dev->type); + res = -EOPNOTSUPP; + goto err_undo_flags; + } + + if (!slave_ops->ndo_set_mac_address || + slave_dev->type == ARPHRD_INFINIBAND) { netdev_warn(bond_dev, "The slave device specified does not support setting the MAC address\n"); if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP && bond->params.fail_over_mac != BOND_FOM_ACTIVE) { -- 2.8.0
Re: [PATCH net-next 2/2] net/mlx5e: Query minimum required header copy during xmit
On Thu, Jul 21, 2016 at 8:15 AM, Alexei Starovoitov <alexei.starovoi...@gmail.com> wrote: > On Thu, Jul 21, 2016 at 01:20:02AM +0300, Saeed Mahameed wrote: >> From: Hadar Hen Zion <had...@mellanox.com> >> >> Add support for query the minimum inline mode from the Firmware. >> It is required for correct TX steering according to L3/L4 packet >> headers. >> >> Each send queue (SQ) has inline mode that defines the minimal required >> headers that needs to be copied into the SQ WQE. >> The driver asks the Firmware for the wqe_inline_mode device capability >> value. In case the device capability defined as "vport context" the >> driver must check the reported min inline mode from the vport context >> before creating its SQs. >> >> Signed-off-by: Hadar Hen Zion <had...@mellanox.com> >> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> > ... >> + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); >> + u32 *out; >> + >> + out = mlx5_vzalloc(outlen); >> + if (!out) >> + return; > > Just discovered this... > outlen is a small constant here, yet you want to try to vmalloc it? > What is the point? > There are 67 places in mlx5 where failed kmalloc is retried with > vmalloc... was that path ever tested? The point is that there are a lot of places in the code that want to allocate huge commands and mlx5_vzalloc is a nice black box that provides the method to allocate such huge chunks of memory. Now sometimes people tend to reuse same pieces of code for code consistency. I don't see any harm from doing that.
Re: [PATCH net-next 2/2] net/mlx5e: Query minimum required header copy during xmit
On Thu, Jul 21, 2016 at 7:15 PM, David Miller <da...@davemloft.net> wrote: > From: Saeed Mahameed <sae...@dev.mellanox.co.il> > Date: Thu, 21 Jul 2016 11:22:32 +0300 > >> On Thu, Jul 21, 2016 at 8:15 AM, Alexei Starovoitov >> <alexei.starovoi...@gmail.com> wrote: >>> On Thu, Jul 21, 2016 at 01:20:02AM +0300, Saeed Mahameed wrote: >>>> From: Hadar Hen Zion <had...@mellanox.com> >>>> >>>> Add support for query the minimum inline mode from the Firmware. >>>> It is required for correct TX steering according to L3/L4 packet >>>> headers. >>>> >>>> Each send queue (SQ) has inline mode that defines the minimal required >>>> headers that needs to be copied into the SQ WQE. >>>> The driver asks the Firmware for the wqe_inline_mode device capability >>>> value. In case the device capability defined as "vport context" the >>>> driver must check the reported min inline mode from the vport context >>>> before creating its SQs. >>>> >>>> Signed-off-by: Hadar Hen Zion <had...@mellanox.com> >>>> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> >>> ... >>>> + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); >>>> + u32 *out; >>>> + >>>> + out = mlx5_vzalloc(outlen); >>>> + if (!out) >>>> + return; >>> >>> Just discovered this... >>> outlen is a small constant here, yet you want to try to vmalloc it? >>> What is the point? >>> There are 67 places in mlx5 where failed kmalloc is retried with >>> vmalloc... was that path ever tested? >> >> The point is that there are a lot of places in the code that want to >> allocate huge commands and mlx5_vzalloc is a nice black box that >> provides the method to allocate such huge chunks of memory. > > If it's a "black box" then don't mention that it uses vmalloc in the > function name. Right, mlx5_zalloc would make a better name, I will change this in a future refactoring patch. but how is this related to this series ? I am already working on a series for next kernel release to improve mlx5 command interface, command layout allocation/filling and execution. I will be glad to listen to any suggestion you have regarding using mlx5_vzalloc for small buffers allocation .. Thanks, Saeed.
[PATCH net] net/bonding: Enforce active-backup policy for IPoIB bonds
From: Mark Bloch <ma...@mellanox.com> When using an IPoIB bond currently only active-backup mode is a valid use case and this commit strengthens it. Since commit 2ab82852a270 ("net/bonding: Enable bonding to enslave netdevices not supporting set_mac_address()") was introduced till 4.7-rc1, IPoIB didn't support the set_mac_address ndo, and hence the fail over mac policy always applied to IPoIB bonds. With the introduction of commit 492a7e67ff83 ("IB/IPoIB: Allow setting the device address"), that doesn't hold and practically IPoIB bonds are broken as of that. To fix it, lets go to fail over mac if the device doesn't support the ndo OR this is IPoIB device. As a by-product, this commit also prevents a stack corruption which occurred when trying to copy 20 bytes (IPoIB) device address to a sockaddr struct that has only 16 bytes of storage. Signed-off-by: Mark Bloch <ma...@mellanox.com> Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/bonding/bond_main.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a2afa3b..ccd4003 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1422,7 +1422,15 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) return -EINVAL; } - if (slave_ops->ndo_set_mac_address == NULL) { + if (slave_dev->type == ARPHRD_INFINIBAND && + BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { + netdev_warn(bond_dev, "Type (%d) supports only active-backup mode\n", + slave_dev->type); + goto err_undo_flags; + } + + if (!slave_ops->ndo_set_mac_address || + slave_dev->type == ARPHRD_INFINIBAND) { netdev_warn(bond_dev, "The slave device specified does not support setting the MAC address\n"); if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP && bond->params.fail_over_mac != BOND_FOM_ACTIVE) { -- 2.8.0
[PATCH net] net/mlx5e: Fix del vxlan port command
mlx5e_vxlan_core_del_port_cmd cleared the wrong address, the address of the pointer of the buffer rather than clearing the buffer itself. This can lead to a serious stack corruption. Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling") Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/vxlan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c index 05de772..e25a73ed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c @@ -72,8 +72,8 @@ static int mlx5e_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port) u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)]; u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)]; - memset(, 0, sizeof(in)); - memset(, 0, sizeof(out)); + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); MLX5_SET(delete_vxlan_udp_dport_in, in, opcode, MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT); -- 2.8.0
Re: [PATCH net] net/mlx5e: Fix del vxlan port command
On Wed, Jul 20, 2016 at 7:15 PM, Alexei Starovoitov <alexei.starovoi...@gmail.com> wrote: > On Wed, Jul 20, 2016 at 05:48:48PM +0300, Saeed Mahameed wrote: >> mlx5e_vxlan_core_del_port_cmd cleared the wrong address, the address of >> the pointer of the buffer rather than clearing the buffer itself. >> >> This can lead to a serious stack corruption. > > The change is fine, but commit log is complete nonsense. > Nacking to make sure it's adjusted. > We cannot have such commits in the git history. That was my hunch too :) , will fix this.
[PATCH net 5/7] net/mlx5e: Prevent adding the same vxlan port
From: Matthew Finlay <m...@mellanox.com> Do not allow the same vxlan udp port to be added to the device more than once. Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling") Signed-off-by: Matthew Finlay <m...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/vxlan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c index f2fd1ef..05de772 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c @@ -105,6 +105,9 @@ static void mlx5e_vxlan_add_port(struct work_struct *work) struct mlx5e_vxlan *vxlan; int err; + if (mlx5e_vxlan_lookup_port(priv, port)) + goto free_work; + if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port)) goto free_work; -- 2.8.0
[PATCH net 7/7] net/mlx5e: Reorganize ethtool statistics
From: Gal Pressman <g...@mellanox.com> Categorize and reorganize ethtool statistics counters by renaming to "rx_*" and "tx_*" and removing redundant and duplicated counters, this way they are easier to grasp and more user friendly. Signed-off-by: Gal Pressman <g...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 30 ++- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 27 ++- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c| 4 +- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 228 + drivers/net/ethernet/mellanox/mlx5/core/en_tx.c| 4 +- 5 files changed, 130 insertions(+), 163 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index c709d41..e667a87 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -213,42 +213,41 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) /* SW counters */ for (i = 0; i < NUM_SW_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].name); + strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].format); /* Q counters */ for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].name); + strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].format); /* VPORT counters */ for (i = 0; i < NUM_VPORT_COUNTERS; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, - vport_stats_desc[i].name); + vport_stats_desc[i].format); /* PPORT counters */ for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_802_3_stats_desc[i].name); + pport_802_3_stats_desc[i].format); for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_2863_stats_desc[i].name); + pport_2863_stats_desc[i].format); for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_2819_stats_desc[i].name); + pport_2819_stats_desc[i].format); for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, "prio%d_%s", - prio, - pport_per_prio_traffic_stats_desc[i].name); + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_traffic_stats_desc[i].format, prio); } pfc_combined = mlx5e_query_pfc_combined(priv); for_each_set_bit(prio, _combined, NUM_PPORT_PRIO) { for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { - sprintf(data + (idx++) * ETH_GSTRING_LEN, "prio%d_%s", - prio, pport_per_prio_pfc_stats_desc[i].name); + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_pfc_stats_desc[i].format, prio); } } @@ -258,16 +257,15 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) /* per channel counters */ for (i = 0; i < priv->params.num_channels; i++) for (j = 0; j < NUM_RQ_STATS; j++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, "rx%d_%s", i, - rq_stats_desc[j].name); + sprintf(data + (idx++) * ETH_GSTRING_LEN, + rq_stats_desc[j].format, i); for (tc = 0; tc < priv->params.num_tc; tc++) for (i = 0; i < priv->params.num_channels; i++) for (j = 0; j < NUM_SQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, - "tx%d_%s", - priv->channeltc_to_txq_map[i][tc], - sq_stats_desc[j].name); + sq_stats_desc[j].format, + priv->channeltc_to_txq_map[i][tc]); } static void mlx5e_get_strings(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 793d7a1..cb6defd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/c
[PATCH net 6/7] net/mlx5e: Fix number of PFC counters reported to ethtool
From: Gal Pressman <g...@mellanox.com> Number of PFC counters used to count only number of priorities with PFC enabled, but each priority has more than one counter, hence the need to multiply it by the number of PFC counters per priority. Fixes: cf678570d5a1 ('net/mlx5e: Add per priority group to PPort counters') Signed-off-by: Gal Pressman <g...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index fc7dcc0..c709d41 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -184,7 +184,9 @@ static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) #define MLX5E_NUM_SQ_STATS(priv) \ (NUM_SQ_STATS * priv->params.num_channels * priv->params.num_tc * \ test_bit(MLX5E_STATE_OPENED, >state)) -#define MLX5E_NUM_PFC_COUNTERS(priv) hweight8(mlx5e_query_pfc_combined(priv)) +#define MLX5E_NUM_PFC_COUNTERS(priv) \ + (hweight8(mlx5e_query_pfc_combined(priv)) * \ +NUM_PPORT_PER_PRIO_PFC_COUNTERS) static int mlx5e_get_sset_count(struct net_device *dev, int sset) { -- 2.8.0
[PATCH net 3/7] net/mlx5e: Change enum to better reflect usage
From: Eli Cohen <e...@mellanox.com> Change MLX5E_STATE_ASYNC_EVENTS_ENABLE to MLX5E_STATE_ASYNC_EVENTS_ENABLED since it represent a state and not an operation. Fixes: acff797cd1874 ('net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet functionality') Signed-off-by: Eli Cohen <e...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index e8a6c33..baa991a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -401,7 +401,7 @@ enum mlx5e_traffic_types { }; enum { - MLX5E_STATE_ASYNC_EVENTS_ENABLE, + MLX5E_STATE_ASYNC_EVENTS_ENABLED, MLX5E_STATE_OPENED, MLX5E_STATE_DESTROYING, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f5c8d5d..4383f8c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -244,7 +244,7 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, { struct mlx5e_priv *priv = vpriv; - if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, >state)) + if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, >state)) return; switch (event) { @@ -260,12 +260,12 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, static void mlx5e_enable_async_events(struct mlx5e_priv *priv) { - set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, >state); + set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, >state); } static void mlx5e_disable_async_events(struct mlx5e_priv *priv) { - clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, >state); + clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, >state); synchronize_irq(mlx5_get_msix_vec(priv->mdev, MLX5_EQ_VEC_ASYNC)); } -- 2.8.0
[PATCH net 1/7] net/mlx5: Update command strings
From: Eli Cohen <e...@mellanox.com> Add command string for MODIFY_FLOW_TABLE which is used by the driver. Signed-off-by: Eli Cohen <e...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index dcd2df6..0b49862 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -545,6 +545,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(ALLOC_FLOW_COUNTER); MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER); MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER); + MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE); default: return "unknown command opcode"; } } -- 2.8.0
[PATCH net 4/7] net/mlx5e: Check for BlueFlame capability before allocating SQ uar
From: Gal Pressman <g...@mellanox.com> Previous to this patch mapping was always set to write combining without checking whether BlueFlame is supported in the device. Fixes: 0ba422410bbf ('net/mlx5: Fix global UAR mapping') Signed-off-by: Gal Pressman <g...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 4383f8c..793d7a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -580,7 +580,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); int err; - err = mlx5_alloc_map_uar(mdev, >uar, true); + err = mlx5_alloc_map_uar(mdev, >uar, !!MLX5_CAP_GEN(mdev, bf)); if (err) return err; -- 2.8.0
[PATCH net 0/7] Mellanox 100G mlx5 fixes#2 for 4.7-rc
Hi Dave, The following series provides one-liners fixes for mlx5 driver plus one medium patch to reorganize ethtool counters reporting. Highlights: - Added MODIFY_FLOW_TABLE to command strings table - Add ConnectX-5 PCIe 4.0 to list of supported devices - Rename ASYNC_EVENTS enum - Enable BlueFlame only when supported by device - Avoid adding same vxlan port twice - Report the correct number of PFC counters - Reorganize ethtool reported counters and remove duplications Thanks, Saeed. Eli Cohen (2): net/mlx5: Update command strings net/mlx5e: Change enum to better reflect usage Gal Pressman (3): net/mlx5e: Check for BlueFlame capability before allocating SQ uar net/mlx5e: Fix number of PFC counters reported to ethtool net/mlx5e: Reorganize ethtool statistics Majd Dibbiny (1): net/mlx5: Add ConnectX-5 PCIe 4.0 to list of supported devices Matthew Finlay (1): net/mlx5e: Prevent adding the same vxlan port drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 34 +-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 35 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c| 4 +- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 228 + drivers/net/ethernet/mellanox/mlx5/core/en_tx.c| 4 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 +- drivers/net/ethernet/mellanox/mlx5/core/vxlan.c| 3 + 9 files changed, 144 insertions(+), 170 deletions(-) -- 2.8.0
[PATCH net 2/7] net/mlx5: Add ConnectX-5 PCIe 4.0 to list of supported devices
From: Majd Dibbiny <m...@mellanox.com> Add the upcoming ConnectX-5 PCIe 4.0 device to the list of supported devices by the mlx5 driver. Signed-off-by: Majd Dibbiny <m...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index a19b593..c65f4a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1508,8 +1508,9 @@ static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4 VF */ { PCI_VDEVICE(MELLANOX, 0x1015) }, /* ConnectX-4LX */ { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */ - { PCI_VDEVICE(MELLANOX, 0x1017) }, /* ConnectX-5 */ + { PCI_VDEVICE(MELLANOX, 0x1017) }, /* ConnectX-5, PCIe 3.0 */ { PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 VF */ + { PCI_VDEVICE(MELLANOX, 0x1019) }, /* ConnectX-5, PCIe 4.0 */ { 0, } }; -- 2.8.0
Re: mlx5 SRIOV VLAN support
On Tue, Jul 5, 2016 at 10:37 AM, domingo montoyawrote: > Hi, > > Is VLAN supported in SRIOV mode for mlx5? > For VGT mode vlan is supported by default same as bare-metal. For VST you need to use ip tool to configure VST vlan: ip link set eth vf 1 vlan 2 qos 2 note: VGT mode will not work on VF if VST is configured on that VF. > > Can anyone please help me to: > > Enable VLAN guest tagging in mlx5 PF driver? > > Do I need to add any E-switch rules to do the same? > No, Nothing is required for VGT mode, just create the vlan on guest VF machine and work with it! E-Swtich is automatically/dynamically configured. > > My requirement is something like this: > > > mlx5 VF driver<--->mlx5 PF > driver<->Network<---> mlx5 dedicated > driver(non-sriov) > > > > I would like to be able to send ICMP traffic between mlx5 VF driver > and mlx5 dedicated driver. > > My understanding is I should be able to do without adding any rules in > the E-switch on the PF driver as by default all traffic is allowed. > > Is this correct? > correct. > > > I tried but as this didn't work. I added ingress rules to allow VLAN > traffic and also egress rules to the E-switch ACL tables. > How did you do this ? changed the code ? by default (no VST/no spoof check) ACL tables should not be opened. > But still i cannot see any traffic between mlx5 VF driver and mlx5 > dedicated driver. > > I can see that the packets reach the CX4 adapter (mlx5 VF/mlx5 PF) but > they don't go out on the network. > > i tried configuring vlan using ip link add dev enp1s0f0 name > enp1s0f0.100 type vlan id 100 on both the mlx5 vf driver and mlx5 > dedicated driver. > > Strange, should work. Please make sure VST mode is off on the VF. can you share some logs with us: on PF/VF and remote dedicated driver: ip link show ethtool -k Thanks, Saeed.
[PATCH net] bonding: fix enslavement slave link notifications
From: Aviv Heller <av...@mellanox.com> Currently, link notifications are not sent by bond_set_slave_link_state() upon enslavement if the slave is enslaved when up. This happens because slave->link default init value is 0, which is the same as BOND_LINK_UP, resulting in bond_set_slave_link_state() ignoring this transition. This patch sets the default value of slave->link to BOND_LINK_NOCHANGE, assuring it will count as a state transition and thus trigger notification logic. Signed-off-by: Aviv Heller <av...@mellanox.com> Reviewed-by: Jiri Pirko <j...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/bonding/bond_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 941ec99..a2afa3b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1584,6 +1584,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) } /* check for initial state */ + new_slave->link = BOND_LINK_NOCHANGE; if (bond->params.miimon) { if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) { if (bond->params.updelay) { -- 2.8.0
Re: mlx5 SRIOV VLAN support
On Tue, Jul 5, 2016 at 12:23 PM, domingo montoya <reach.domingomont...@gmail.com> wrote: > Thanks so much for the reply. Please find the logs. > > [...] > # ip link show enp1s0f0 > 3: enp1s0f0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state > UP mode DEFAULT qlen 1000 > link/ether e4:1d:2d:c9:c8:9a brd ff:ff:ff:ff:ff:ff > > # ethtool -k enp1s0f0 > Features for enp1s0f0: [...] > rx-vlan-filter: on I was basically looking for this and it looks ok. > > > > > On Tue, Jul 5, 2016 at 1:41 PM, Saeed Mahameed > <sae...@dev.mellanox.co.il> wrote: >> On Tue, Jul 5, 2016 at 10:37 AM, domingo montoya >> <reach.domingomont...@gmail.com> wrote: >>> Hi, >>> >>> Is VLAN supported in SRIOV mode for mlx5? >>> >> >> For VGT mode vlan is supported by default same as bare-metal. >> For VST you need to use ip tool to configure VST vlan: ip link set >> eth vf 1 vlan 2 qos 2 >> >> note: VGT mode will not work on VF if VST is configured on that VF. > > Can you please tell me how can I check if VST mode is enabled on the > VF from the VF driver. > from the VF driver you can't. you need to run ip link show on the PF interface of the SRIOV Hypervisor. > > >> >>> >>> Can anyone please help me to: >>> >>> Enable VLAN guest tagging in mlx5 PF driver? >>> >>> Do I need to add any E-switch rules to do the same? >>> >> >> No, >> Nothing is required for VGT mode, just create the vlan on guest VF >> machine and work with it! >> E-Swtich is automatically/dynamically configured. > > > Is it something like when we create vlan on the guest VF, the VF > driver sends commands to the adapter fw and the adapter fw sends any > notification to the PF driver to configure Eswitch? > yes but for vlan lists we do nothing in PF driver e-switch notification since it e-switch allows all vlans by default. > >> >>> >>> My requirement is something like this: >>> >>> >>> mlx5 VF driver<--->mlx5 PF >>> driver<->Network<---> mlx5 dedicated >>> driver(non-sriov) >>> >>> >>> >>> I would like to be able to send ICMP traffic between mlx5 VF driver >>> and mlx5 dedicated driver. >>> >>> My understanding is I should be able to do without adding any rules in >>> the E-switch on the PF driver as by default all traffic is allowed. >>> >>> Is this correct? >>> >> >> correct. >> >>> >>> >>> I tried but as this didn't work. I added ingress rules to allow VLAN >>> traffic and also egress rules to the E-switch ACL tables. >>> >> >> How did you do this ? changed the code ? by default (no VST/no spoof >> check) ACL tables should not be opened. > > > Yes. I changed the code. Okay. Now I understand that ACL tables are > only for VST or spoofcheck. Thanks! > did you change the code anywhere else ? >> >>> But still i cannot see any traffic between mlx5 VF driver and mlx5 >>> dedicated driver. >>> >>> I can see that the packets reach the CX4 adapter (mlx5 VF/mlx5 PF) but >>> they don't go out on the network. >>> >>> i tried configuring vlan using ip link add dev enp1s0f0 name >>> enp1s0f0.100 type vlan id 100 on both the mlx5 vf driver and mlx5 >>> dedicated driver. >>> >>> >> >> Strange, should work. Please make sure VST mode is off on the VF. >> can you share some logs with us: >> on PF/VF and remote dedicated driver: >> >> ip link show >> ethtool -k > > > Sure. Wanted to check how to turn off the VST mode on the VF or make > sure it is off. I have pasted the logs with this mail(at the > beginning). > I had one more doubt. If we need to disable VST mode on the VF from > the PF driver, using for e.g > > ip link set dev eth0 vf 2 4095, > > > will this command eventually call .ndo_set_vf_vlan and setup the > Eswitch ACL Ingress, Egress rules? > no, to disable you need to run 'ip link set dev eth0 vf 2 vlan 0 qos 0' BTW it should be disabled by default. to see if it disabled just run 'ip link show dev eth0' and see no vlan is configured on VF 2
Re: [PATCH] net/mlx5: Avoid setting unused var when modifying vport node GUID
On Tue, Jul 5, 2016 at 12:17 PM, Or Gerlitz <ogerl...@mellanox.com> wrote: > GCC complains on unused-but-set-variable, clean this up. > > Fixes: 23898c763f4a ('net/mlx5: E-Switch, Modify node guid on vf set MAC') > Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Acked-by: Saeed Mahameed <sae...@mellanox.com>
Re: [PATCH v6 05/12] Add sample for adding simple drop program to link
On Fri, Jul 8, 2016 at 5:15 AM, Brenden Blancowrote: > Add a sample program that only drops packets at the BPF_PROG_TYPE_XDP_RX > hook of a link. With the drop-only program, observed single core rate is > ~20Mpps. > > Other tests were run, for instance without the dropcnt increment or > without reading from the packet header, the packet rate was mostly > unchanged. > > $ perf record -a samples/bpf/xdp1 $( proto 17: 20403027 drops/s > > ./pktgen_sample03_burst_single_flow.sh -i $DEV -d $IP -m $MAC -t 4 > Running... ctrl^C to stop > Device: eth4@0 > Result: OK: 11791017(c11788327+d2689) usec, 59622913 (60byte,0frags) > 5056638pps 2427Mb/sec (2427186240bps) errors: 0 > Device: eth4@1 > Result: OK: 11791012(c11787906+d3106) usec, 60526944 (60byte,0frags) > 5133311pps 2463Mb/sec (2463989280bps) errors: 0 > Device: eth4@2 > Result: OK: 11791019(c11788249+d2769) usec, 59868091 (60byte,0frags) > 5077431pps 2437Mb/sec (2437166880bps) errors: 0 > Device: eth4@3 > Result: OK: 11795039(c11792403+d2636) usec, 59483181 (60byte,0frags) > 5043067pps 2420Mb/sec (2420672160bps) errors: 0 > > perf report --no-children: > 26.05% ksoftirqd/0 [mlx4_en] [k] mlx4_en_process_rx_cq > 17.84% ksoftirqd/0 [mlx4_en] [k] mlx4_en_alloc_frags > 5.52% ksoftirqd/0 [mlx4_en] [k] mlx4_en_free_frag This just proves my point on the previous patch, reusing the rx_desc buffers we are going to drop will save us here ~23% CPU wasted on (alloc_frags & free_frags ) ! and this can improve some benchmarks results where the CPU is the bottleneck.
Re: [PATCH v6 04/12] net/mlx4_en: add support for fast rx drop bpf program
On Fri, Jul 8, 2016 at 5:15 AM, Brenden Blancowrote: > Add support for the BPF_PROG_TYPE_XDP hook in mlx4 driver. > > In tc/socket bpf programs, helpers linearize skb fragments as needed > when the program touchs the packet data. However, in the pursuit of > speed, XDP programs will not be allowed to use these slower functions, > especially if it involves allocating an skb. > > Therefore, disallow MTU settings that would produce a multi-fragment > packet that XDP programs would fail to access. Future enhancements could > be done to increase the allowable MTU. > > Signed-off-by: Brenden Blanco > --- > drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 38 > ++ > drivers/net/ethernet/mellanox/mlx4/en_rx.c | 36 +--- > drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 5 > 3 files changed, 75 insertions(+), 4 deletions(-) > [...] > + /* A bpf program gets first chance to drop the packet. It may > +* read bytes but not past the end of the frag. > +*/ > + if (prog) { > + struct xdp_buff xdp; > + dma_addr_t dma; > + u32 act; > + > + dma = be64_to_cpu(rx_desc->data[0].addr); > + dma_sync_single_for_cpu(priv->ddev, dma, > + priv->frag_info[0].frag_size, > + DMA_FROM_DEVICE); In case of XDP_PASS we will dma_sync again in the normal path, this can be improved by doing the dma_sync as soon as we can and once and for all, regardless of the path the packet is going to take (XDP_DROP/mlx4_en_complete_rx_desc/mlx4_en_rx_skb). > + > + xdp.data = page_address(frags[0].page) + > + frags[0].page_offset; > + xdp.data_end = xdp.data + length; > + > + act = bpf_prog_run_xdp(prog, ); > + switch (act) { > + case XDP_PASS: > + break; > + default: > + bpf_warn_invalid_xdp_action(act); > + case XDP_DROP: > + goto next; The drop action here (goto next) will release the current rx_desc buffers and use new ones to refill, I know that the mlx4 rx scheme will release/allocate new pages once every ~32 packet, but one improvement can really help here especially for XDP_DROP benchmarks is to reuse the current rx_desc buffers in case it is going to be dropped. Considering if mlx4 rx buffer scheme doesn't allow gaps, Maybe this can be added later as future improvement for the whole mlx4 rx data path drop decisions.
Re: [PATCH v6 04/12] net/mlx4_en: add support for fast rx drop bpf program
On Sun, Jul 10, 2016 at 7:05 PM, Brenden Blanco <bbla...@plumgrid.com> wrote: > On Sun, Jul 10, 2016 at 06:25:40PM +0300, Tariq Toukan wrote: >> >> On 09/07/2016 10:58 PM, Saeed Mahameed wrote: >> >On Fri, Jul 8, 2016 at 5:15 AM, Brenden Blanco <bbla...@plumgrid.com> wrote: >> >>+ /* A bpf program gets first chance to drop the packet. It >> >>may >> >>+* read bytes but not past the end of the frag. >> >>+*/ >> >>+ if (prog) { >> >>+ struct xdp_buff xdp; >> >>+ dma_addr_t dma; >> >>+ u32 act; >> >>+ >> >>+ dma = be64_to_cpu(rx_desc->data[0].addr); >> >>+ dma_sync_single_for_cpu(priv->ddev, dma, >> >>+ >> >>priv->frag_info[0].frag_size, >> >>+ DMA_FROM_DEVICE); >> >In case of XDP_PASS we will dma_sync again in the normal path, this >> >can be improved by doing the dma_sync as soon as we can and once and >> >for all, regardless of the path the packet is going to take >> >(XDP_DROP/mlx4_en_complete_rx_desc/mlx4_en_rx_skb). >> I agree with Saeed, dma_sync is a heavy operation that is now done >> twice for all packets with XDP_PASS. >> We should try our best to avoid performance degradation in the flow >> of unfiltered packets. > Makes sense, do folks here see a way to do this cleanly? yes, we need something like: +static inline void +mlx4_en_sync_dma(struct mlx4_en_priv *priv, +struct mlx4_en_rx_desc *rx_desc, +int length) +{ + dma_addr_t dma; + + /* Sync dma addresses from HW descriptor */ + for (nr = 0; nr < priv->num_frags; nr++) { + struct mlx4_en_frag_info *frag_info = >frag_info[nr]; + + if (length <= frag_info->frag_prefix_size) + break; + + dma = be64_to_cpu(rx_desc->data[nr].addr); + dma_sync_single_for_cpu(priv->ddev, dma, frag_info->frag_size, + DMA_FROM_DEVICE); + } +} @@ -790,6 +808,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud goto next; } + length = be32_to_cpu(cqe->byte_cnt); + length -= ring->fcs_del; + + mlx4_en_sync_dma(priv,rx_desc, length); /* data is available continue processing the packet */ and make sure to remove all explicit dma_sync_single_for_cpu calls.
[PATCH net-next 3/8] net/mlx5: Properly remove all steering objects
From: Maor Gottlieb <ma...@mellanox.com> Instead of explicitly cleaning up the well known parts of the steering tree, we use the generic tree structure to traverse for cleanup. No functional changes. Signed-off-by: Maor Gottlieb <ma...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 120 +++--- 1 file changed, 15 insertions(+), 105 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 0f969cb..3e95775 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1663,115 +1663,24 @@ cleanup: return -ENOMEM; } -static void cleanup_single_prio_root_ns(struct mlx5_flow_steering *steering, - struct mlx5_flow_root_namespace *root_ns) +static void clean_tree(struct fs_node *node) { - struct fs_node *prio; - - if (!root_ns) - return; + if (node) { + struct fs_node *iter; + struct fs_node *temp; - if (!list_empty(_ns->ns.node.children)) { - prio = list_first_entry(_ns->ns.node.children, - struct fs_node, -list); - if (tree_remove_node(prio)) - mlx5_core_warn(steering->dev, - "Flow steering priority wasn't destroyed, refcount > 1\n"); + list_for_each_entry_safe(iter, temp, >children, list) + clean_tree(iter); + tree_remove_node(node); } - if (tree_remove_node(_ns->ns.node)) - mlx5_core_warn(steering->dev, - "Flow steering namespace wasn't destroyed, refcount > 1\n"); - root_ns = NULL; -} - -static void destroy_flow_tables(struct fs_prio *prio) -{ - struct mlx5_flow_table *iter; - struct mlx5_flow_table *tmp; - - fs_for_each_ft_safe(iter, tmp, prio) - mlx5_destroy_flow_table(iter); } -static void cleanup_root_ns(struct mlx5_flow_steering *steering) +static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns) { - struct mlx5_flow_root_namespace *root_ns = steering->root_ns; - struct fs_prio *iter_prio; - - if (!MLX5_CAP_GEN(steering->dev, nic_flow_table)) - return; - if (!root_ns) return; - /* stage 1 */ - fs_for_each_prio(iter_prio, _ns->ns) { - struct fs_node *node; - struct mlx5_flow_namespace *iter_ns; - - fs_for_each_ns_or_ft(node, iter_prio) { - if (node->type == FS_TYPE_FLOW_TABLE) - continue; - fs_get_obj(iter_ns, node); - while (!list_empty(_ns->node.children)) { - struct fs_prio *obj_iter_prio2; - struct fs_node *iter_prio2 = - list_first_entry(_ns->node.children, -struct fs_node, -list); - - fs_get_obj(obj_iter_prio2, iter_prio2); - destroy_flow_tables(obj_iter_prio2); - if (tree_remove_node(iter_prio2)) { - mlx5_core_warn(steering->dev, - "Priority %d wasn't destroyed, refcount > 1\n", - obj_iter_prio2->prio); - return; - } - } - } - } - - /* stage 2 */ - fs_for_each_prio(iter_prio, _ns->ns) { - while (!list_empty(_prio->node.children)) { - struct fs_node *iter_ns = - list_first_entry(_prio->node.children, -struct fs_node, -list); - if (tree_remove_node(iter_ns)) { - mlx5_core_warn(steering->dev, - "Namespace wasn't destroyed, refcount > 1\n"); - return; - } - } - } - - /* stage 3 */ - while (!list_empty(_ns->ns.node.children)) { - struct fs_prio *obj_prio_node; - struct fs_node *prio_node = - list_first_entry(_ns->ns.node.children, -
[PATCH net-next 2/8] net/mlx5: Introduce mlx5_flow_steering structure
From: Maor Gottlieb <ma...@mellanox.com> Instead of having all steering private name spaces and steering module fields flat in mlx5_core_priv, we wrap them in mlx5_flow_steering for better modularity and API exposure. Signed-off-by: Maor Gottlieb <ma...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 134 -- drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 8 ++ include/linux/mlx5/driver.h | 6 +- 3 files changed, 84 insertions(+), 64 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 7fcdae1..0f969cb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1363,12 +1363,13 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg) struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type) { - struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns; + struct mlx5_flow_steering *steering = dev->priv.steering; + struct mlx5_flow_root_namespace *root_ns; int prio; struct fs_prio *fs_prio; struct mlx5_flow_namespace *ns; - if (!root_ns) + if (!steering) return NULL; switch (type) { @@ -1380,24 +1381,28 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, prio = type; break; case MLX5_FLOW_NAMESPACE_FDB: - if (dev->priv.fdb_root_ns) - return >priv.fdb_root_ns->ns; + if (steering->fdb_root_ns) + return >fdb_root_ns->ns; else return NULL; case MLX5_FLOW_NAMESPACE_ESW_EGRESS: - if (dev->priv.esw_egress_root_ns) - return >priv.esw_egress_root_ns->ns; + if (steering->esw_egress_root_ns) + return >esw_egress_root_ns->ns; else return NULL; case MLX5_FLOW_NAMESPACE_ESW_INGRESS: - if (dev->priv.esw_ingress_root_ns) - return >priv.esw_ingress_root_ns->ns; + if (steering->esw_ingress_root_ns) + return >esw_ingress_root_ns->ns; else return NULL; default: return NULL; } + root_ns = steering->root_ns; + if (!root_ns) + return NULL; + fs_prio = find_prio(_ns->ns, prio); if (!fs_prio) return NULL; @@ -1483,13 +1488,13 @@ static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps) return true; } -static int init_root_tree_recursive(struct mlx5_core_dev *dev, +static int init_root_tree_recursive(struct mlx5_flow_steering *steering, struct init_tree_node *init_node, struct fs_node *fs_parent_node, struct init_tree_node *init_parent_node, int prio) { - int max_ft_level = MLX5_CAP_FLOWTABLE(dev, + int max_ft_level = MLX5_CAP_FLOWTABLE(steering->dev, flow_table_properties_nic_receive. max_ft_level); struct mlx5_flow_namespace *fs_ns; @@ -1500,7 +1505,7 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev, if (init_node->type == FS_TYPE_PRIO) { if ((init_node->min_ft_level > max_ft_level) || - !has_required_caps(dev, _node->caps)) + !has_required_caps(steering->dev, _node->caps)) return 0; fs_get_obj(fs_ns, fs_parent_node); @@ -1521,7 +1526,7 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev, } prio = 0; for (i = 0; i < init_node->ar_size; i++) { - err = init_root_tree_recursive(dev, _node->children[i], + err = init_root_tree_recursive(steering, _node->children[i], base, init_node, prio); if (err) return err; @@ -1534,7 +1539,7 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev, return 0; } -static int init_root_tree(struct mlx5_core_dev *dev, +static int init_root_tree(struct mlx5_flow_steering *steering, struct init_tree_node *init_node, struct fs_node *fs_parent_node) { @@ -1544,7 +1549,7 @@ static int init_root_tr
[PATCH net-next 5/8] net/mlx5e: Support l3/l4 flow type specs in ethtool flow steering
From: Maor Gottlieb <ma...@mellanox.com> Add support to add flow steering rules with ethtool of L3/L4 flow types (ip4/tcp4/udp4). Those rules will be in higher priority than l2 flow rules, in order to prefer more specific rules. Mask is not supported for l3/l4 flow types. Signed-off-by: Maor Gottlieb <ma...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 + .../ethernet/mellanox/mlx5/core/en_fs_ethtool.c| 161 - drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 2 +- 3 files changed, 163 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 357320e..9842594 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -549,9 +549,11 @@ struct mlx5e_ethtool_table { intnum_rules; }; +#define ETHTOOL_NUM_L3_L4_FTS 7 #define ETHTOOL_NUM_L2_FTS 4 struct mlx5e_ethtool_steering { + struct mlx5e_ethtool_table l3_l4_ft[ETHTOOL_NUM_L3_L4_FTS]; struct mlx5e_ethtool_table l2_ft[ETHTOOL_NUM_L2_FTS]; struct list_headrules; int tot_num_rules; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index ee28a9f..830106e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -48,7 +48,8 @@ static void put_flow_table(struct mlx5e_ethtool_table *eth_ft) } } -#define MLX5E_ETHTOOL_L2_PRIO 0 +#define MLX5E_ETHTOOL_L3_L4_PRIO 0 +#define MLX5E_ETHTOOL_L2_PRIO (MLX5E_ETHTOOL_L3_L4_PRIO + ETHTOOL_NUM_L3_L4_FTS) #define MLX5E_ETHTOOL_NUM_ENTRIES 64000 #define MLX5E_ETHTOOL_NUM_GROUPS 10 static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv, @@ -63,6 +64,17 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv, int prio; switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + max_tuples = ETHTOOL_NUM_L3_L4_FTS; + prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples); + eth_ft = >fs.ethtool.l3_l4_ft[prio]; + break; + case IP_USER_FLOW: + max_tuples = ETHTOOL_NUM_L3_L4_FTS; + prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples); + eth_ft = >fs.ethtool.l3_l4_ft[prio]; + break; case ETHER_FLOW: max_tuples = ETHTOOL_NUM_L2_FTS; prio = max_tuples - num_tuples; @@ -103,6 +115,31 @@ static void mask_spec(u8 *mask, u8 *val, size_t size) *((u8 *)val) = *((u8 *)mask) & *((u8 *)val); } +static void set_ips(void *outer_headers_v, void *outer_headers_c, __be32 ip4src_m, + __be32 ip4src_v, __be32 ip4dst_m, __be32 ip4dst_v) +{ + if (ip4src_m) { + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + _v, sizeof(ip4src_v)); + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + 0xff, sizeof(ip4src_m)); + } + if (ip4dst_m) { + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + _v, sizeof(ip4dst_v)); + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + 0xff, sizeof(ip4dst_m)); + } + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, +ethertype, ETH_P_IP); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, +ethertype, 0x); +} + static int set_flow_attrs(u32 *match_c, u32 *match_v, struct ethtool_rx_flow_spec *fs) { @@ -111,10 +148,66 @@ static int set_flow_attrs(u32 *match_c, u32 *match_v, void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v, outer_headers); u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT); + struct ethtool_tcpip4_spec *l4_mask; + struct ethtool_tcpip4_spec *l4_val; + struct ethtool_usrip4_spec *l3_mask; + struct ethtool_usrip4_spec *l3_val; struct ethhdr *eth_val; struct ethhdr *eth_mask; switch (flow_type) { + case TCP_V4_FLOW: + l4_mask = >m_u.tcp_ip4_spec; + l4_val = >h_u.tcp_ip4_spec; + set_ips(o
[PATCH net-next 8/8] net/mlx5e: Expose flow control counters to ethtool
From: Gal Pressman <g...@mellanox.com> Just like per prio counters, the global flow counters are queried from per priority counters register. Global flow control counters are stored in priority 0 PFC counters. Signed-off-by: Gal Pressman <g...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 35 -- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 11 +++ 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index d652aa9..4a3757e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -139,6 +139,18 @@ static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) return err ? 0 : pfc_en_tx | pfc_en_rx; } +static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 rx_pause; + u32 tx_pause; + int err; + + err = mlx5_query_port_pause(mdev, _pause, _pause); + + return err ? false : rx_pause | tx_pause; +} + #define MLX5E_NUM_Q_CNTRS(priv) (NUM_Q_COUNTERS * (!!priv->q_counter)) #define MLX5E_NUM_RQ_STATS(priv) \ (NUM_RQ_STATS * priv->params.num_channels * \ @@ -147,8 +159,8 @@ static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) (NUM_SQ_STATS * priv->params.num_channels * priv->params.num_tc * \ test_bit(MLX5E_STATE_OPENED, >state)) #define MLX5E_NUM_PFC_COUNTERS(priv) \ - (hweight8(mlx5e_query_pfc_combined(priv)) * \ -NUM_PPORT_PER_PRIO_PFC_COUNTERS) + ((mlx5e_query_global_pause_combined(priv) + hweight8(mlx5e_query_pfc_combined(priv))) * \ + NUM_PPORT_PER_PRIO_PFC_COUNTERS) static int mlx5e_get_sset_count(struct net_device *dev, int sset) { @@ -210,8 +222,18 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) pfc_combined = mlx5e_query_pfc_combined(priv); for_each_set_bit(prio, _combined, NUM_PPORT_PRIO) { for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + char pfc_string[ETH_GSTRING_LEN]; + + snprintf(pfc_string, sizeof(pfc_string), "prio%d", prio); sprintf(data + (idx++) * ETH_GSTRING_LEN, - pport_per_prio_pfc_stats_desc[i].format, prio); + pport_per_prio_pfc_stats_desc[i].format, pfc_string); + } + } + + if (mlx5e_query_global_pause_combined(priv)) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_pfc_stats_desc[i].format, "global"); } } @@ -306,6 +328,13 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, } } + if (mlx5e_query_global_pause_combined(priv)) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + data[idx++] = MLX5E_READ_CTR64_BE(>stats.pport.per_prio_counters[0], + pport_per_prio_pfc_stats_desc, 0); + } + } + if (!test_bit(MLX5E_STATE_OPENED, >state)) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index ae29998..7b9d8a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -254,11 +254,12 @@ static const struct counter_desc pport_per_prio_traffic_stats_desc[] = { }; static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { - { "rx_prio%d_pause", PPORT_PER_PRIO_OFF(rx_pause) }, - { "rx_prio%d_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) }, - { "tx_prio%d_pause", PPORT_PER_PRIO_OFF(tx_pause) }, - { "tx_prio%d_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) }, - { "rx_prio%d_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, + /* %s is "global" or "prio{i}" */ + { "rx_%s_pause", PPORT_PER_PRIO_OFF(rx_pause) }, + { "rx_%s_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) }, + { "tx_%s_pause", PPORT_PER_PRIO_OFF(tx_pause) }, + { "tx_%s_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) }, + { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, }; struct mlx5e_rq_stats { -- 2.8.0
[PATCH net-next 7/8] net/mlx5e: Expose RDMA VPort counters to ethtool
From: Gal Pressman <g...@mellanox.com> Add the needed descriptors to expose RoCE RDMA counters. Signed-off-by: Gal Pressman <g...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 16 1 file changed, 16 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index fcd490c..ae29998 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -151,6 +151,22 @@ static const struct counter_desc vport_stats_desc[] = { VPORT_COUNTER_OFF(transmitted_eth_broadcast.packets) }, { "tx_vport_broadcast_bytes", VPORT_COUNTER_OFF(transmitted_eth_broadcast.octets) }, + { "rx_vport_rdma_unicast_packets", + VPORT_COUNTER_OFF(received_ib_unicast.packets) }, + { "rx_vport_rdma_unicast_bytes", + VPORT_COUNTER_OFF(received_ib_unicast.octets) }, + { "tx_vport_rdma_unicast_packets", + VPORT_COUNTER_OFF(transmitted_ib_unicast.packets) }, + { "tx_vport_rdma_unicast_bytes", + VPORT_COUNTER_OFF(transmitted_ib_unicast.octets) }, + { "rx_vport_rdma_multicast_packets", + VPORT_COUNTER_OFF(received_ib_multicast.packets) }, + { "rx_vport_rdma_multicast_bytes", + VPORT_COUNTER_OFF(received_ib_multicast.octets) }, + { "tx_vport_rdma_multicast_packets", + VPORT_COUNTER_OFF(transmitted_ib_multicast.packets) }, + { "tx_vport_rdma_multicast_bytes", + VPORT_COUNTER_OFF(transmitted_ib_multicast.octets) }, }; #define PPORT_802_3_OFF(c) \ -- 2.8.0
[PATCH net-next 4/8] net/mlx5e: Add ethtool flow steering support
From: Maor Gottlieb <ma...@mellanox.com> Implement etrhtool set_rxnfc callback to support ethtool flow spec direct steering. This patch adds only the support of ether flow type spec. L3/L4 flow specs support will be added in downstream patches. Signed-off-by: Maor Gottlieb <ma...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en.h | 20 ++ .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 21 ++ drivers/net/ethernet/mellanox/mlx5/core/en_fs.c| 3 + .../ethernet/mellanox/mlx5/core/en_fs_ethtool.c| 393 + drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 28 +- include/linux/mlx5/fs.h| 1 + 7 files changed, 456 insertions(+), 12 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index a574dea..05cc1ef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -8,6 +8,6 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \ en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \ - en_tc.o en_arfs.o en_rep.o + en_tc.o en_arfs.o en_rep.o en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 00643a1..357320e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -544,8 +544,22 @@ enum { MLX5E_ARFS_FT_LEVEL }; +struct mlx5e_ethtool_table { + struct mlx5_flow_table *ft; + intnum_rules; +}; + +#define ETHTOOL_NUM_L2_FTS 4 + +struct mlx5e_ethtool_steering { + struct mlx5e_ethtool_table l2_ft[ETHTOOL_NUM_L2_FTS]; + struct list_headrules; + int tot_num_rules; +}; + struct mlx5e_flow_steering { struct mlx5_flow_namespace *ns; + struct mlx5e_ethtool_steering ethtool; struct mlx5e_tc_table tc; struct mlx5e_vlan_table vlan; struct mlx5e_l2_table l2; @@ -701,6 +715,12 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv); void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv); void mlx5e_init_l2_addr(struct mlx5e_priv *priv); void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft); +int mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv, + struct ethtool_rx_flow_spec *fs); +int mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv, + int location); +void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv); +void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv); void mlx5e_set_rx_mode_work(struct work_struct *work); void mlx5e_fill_hwstamp(struct mlx5e_tstamp *clock, u64 timestamp, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 7e61ffa..edbb665 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1368,6 +1368,26 @@ static u32 mlx5e_get_priv_flags(struct net_device *netdev) return priv->pflags; } +static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + int err = 0; + struct mlx5e_priv *priv = netdev_priv(dev); + + switch (cmd->cmd) { + case ETHTOOL_SRXCLSRLINS: + err = mlx5e_ethtool_flow_replace(priv, >fs); + break; + case ETHTOOL_SRXCLSRLDEL: + err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + const struct ethtool_ops mlx5e_ethtool_ops = { .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, @@ -1387,6 +1407,7 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .get_rxfh = mlx5e_get_rxfh, .set_rxfh = mlx5e_set_rxfh, .get_rxnfc = mlx5e_get_rxnfc, + .set_rxnfc = mlx5e_set_rxnfc, .get_tunable = mlx5e_get_tunable, .set_tunable = mlx5e_set_tunable, .get_pauseparam= mlx5e_get_pauseparam, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 2e1e863..1587a9f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ether
[PATCH net-next 6/8] net/mlx5e: Add support to get ethtool flow rules
From: Maor Gottlieb <ma...@mellanox.com> Enhance the existing get_rxnfc callback: 1. Get flow rule of specific ID. 2. Get all flow rules. 3. Get number of rules. Signed-off-by: Maor Gottlieb <ma...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 +++ .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 9 ++ .../ethernet/mellanox/mlx5/core/en_fs_ethtool.c| 34 ++ 3 files changed, 47 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 9842594..1365cdc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -717,6 +717,10 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv); void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv); void mlx5e_init_l2_addr(struct mlx5e_priv *priv); void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft); +int mlx5e_ethtool_get_flow(struct mlx5e_priv *priv, struct ethtool_rxnfc *info, + int location); +int mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv, + struct ethtool_rxnfc *info, u32 *rule_locs); int mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv, struct ethtool_rx_flow_spec *fs); int mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index edbb665..d652aa9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -931,6 +931,15 @@ static int mlx5e_get_rxnfc(struct net_device *netdev, case ETHTOOL_GRXRINGS: info->data = priv->params.num_channels; break; + case ETHTOOL_GRXCLSRLCNT: + info->rule_cnt = priv->fs.ethtool.tot_num_rules; + break; + case ETHTOOL_GRXCLSRULE: + err = mlx5e_ethtool_get_flow(priv, info, info->fs.location); + break; + case ETHTOOL_GRXCLSRLALL: + err = mlx5e_ethtool_get_all_flows(priv, info, rule_locs); + break; default: err = -EOPNOTSUPP; break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 830106e..d17c242 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -537,6 +537,40 @@ out: return err; } +int mlx5e_ethtool_get_flow(struct mlx5e_priv *priv, struct ethtool_rxnfc *info, + int location) +{ + struct mlx5e_ethtool_rule *eth_rule; + + if (location < 0 || location >= MAX_NUM_OF_ETHTOOL_RULES) + return -EINVAL; + + list_for_each_entry(eth_rule, >fs.ethtool.rules, list) { + if (eth_rule->flow_spec.location == location) { + info->fs = eth_rule->flow_spec; + return 0; + } + } + + return -ENOENT; +} + +int mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv, struct ethtool_rxnfc *info, + u32 *rule_locs) +{ + int location = 0; + int idx = 0; + int err = 0; + + while ((!err || err == -ENOENT) && idx < info->rule_cnt) { + err = mlx5e_ethtool_get_flow(priv, info, location); + if (!err) + rule_locs[idx++] = location; + location++; + } + return err; +} + void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv) { struct mlx5e_ethtool_rule *iter; -- 2.8.0
[PATCH net-next 1/8] net/mlx5: Refactor mlx5_add_flow_rule
From: Maor Gottlieb <ma...@mellanox.com> Reduce the set of arguments passed to mlx5_add_flow_rule by introducing flow_spec structure. Signed-off-by: Maor Gottlieb <ma...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/infiniband/hw/mlx5/main.c | 21 ++--- drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c | 68 ++ drivers/net/ethernet/mellanox/mlx5/core/en_fs.c| 96 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c| 31 +++ drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 100 - .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 55 ++-- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 22 ++--- include/linux/mlx5/fs.h| 10 ++- 8 files changed, 171 insertions(+), 232 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b48ad85..dad63f0 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1528,21 +1528,18 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, { struct mlx5_flow_table *ft = ft_prio->flow_table; struct mlx5_ib_flow_handler *handler; + struct mlx5_flow_spec *spec; void *ib_flow = flow_attr + 1; - u8 match_criteria_enable = 0; unsigned int spec_index; - u32 *match_c; - u32 *match_v; u32 action; int err = 0; if (!is_valid_attr(flow_attr)) return ERR_PTR(-EINVAL); - match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + spec = mlx5_vzalloc(sizeof(*spec)); handler = kzalloc(sizeof(*handler), GFP_KERNEL); - if (!handler || !match_c || !match_v) { + if (!handler || !spec) { err = -ENOMEM; goto free; } @@ -1550,7 +1547,8 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, INIT_LIST_HEAD(>list); for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { - err = parse_flow_attr(match_c, match_v, ib_flow); + err = parse_flow_attr(spec->match_criteria, + spec->match_value, ib_flow); if (err < 0) goto free; @@ -1558,11 +1556,11 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, } /* Outer header support only */ - match_criteria_enable = (!outer_header_zero(match_c)) << 0; + spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria)) + << 0; action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; - handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable, - match_c, match_v, + handler->rule = mlx5_add_flow_rule(ft, spec, action, MLX5_FS_DEFAULT_FLOW_TAG, dst); @@ -1578,8 +1576,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, free: if (err) kfree(handler); - kfree(match_c); - kfree(match_v); + kvfree(spec); return err ? ERR_PTR(err) : handler; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 10f18d4..a8cb387 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -175,15 +175,12 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, { struct arfs_table *arfs_t = >fs.arfs.arfs_tables[type]; struct mlx5_flow_destination dest; - u8 match_criteria_enable = 0; struct mlx5e_tir *tir = priv->indir_tir; - u32 *match_criteria; - u32 *match_value; + struct mlx5_flow_spec *spec; int err = 0; - match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - if (!match_value || !match_criteria) { + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { netdev_err(priv->netdev, "%s: alloc failed\n", __func__); err = -ENOMEM; goto out; @@ -208,8 +205,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, goto out; } - arfs_t->default_rule = mlx5_add_flow_rule(arfs_t->ft.t, match_criteria_enable, - match_cri
[PATCH net-next 0/8] Mellanox 100G mlx5 ethtool ntuple steering
Hi Dave, This series adds Ethernet ethtool ntuple steering 'ethtool -N|U' and exposes two more counter sets to Ethtool statistics, RDMA vport and global flow control statistics. We start from three refactoring patches of the flow steering infrastructure - mlx5_add_flow_rule will now receive mlx5 flow spec to simplify and reduce number of parameters - All low level steering objects are now wrapped in mlx5_flow_steering structure for better encapsulation - Flow steering object will now be removed properly and generically rather than traversing on a well-known steering tree objects Patch#4 adds the infrastructure and the data structures needed for the ethtool ntuple steering, all implemented in a new file 'en_fs_ethtool.c'. Add the support for set_rxnfc ethtool callback to add/remove/replace a flow spec of ethter type L2. Patch#5 adds the support for L3/L4 flow specs and a higher priority in favor for L3/L4 rules when interleaving with L2 rules. Patch#6 adds the support for get_rxnfc ethtool callback. Patch#7,8 adds RDMA vport and global flow control statistics. Applied on top: 8186f6e382d8 ('net-next: mediatek: fix compile error inside mtk_poll_controller()') Thanks, Saeed. Gal Pressman (2): net/mlx5e: Expose RDMA VPort counters to ethtool net/mlx5e: Expose flow control counters to ethtool Maor Gottlieb (6): net/mlx5: Refactor mlx5_add_flow_rule net/mlx5: Introduce mlx5_flow_steering structure net/mlx5: Properly remove all steering objects net/mlx5e: Add ethtool flow steering support net/mlx5e: Support l3/l4 flow type specs in ethtool flow steering net/mlx5e: Add support to get ethtool flow rules drivers/infiniband/hw/mlx5/main.c | 21 +- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en.h | 26 + drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c | 68 +-- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 65 ++- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c| 99 ++-- .../ethernet/mellanox/mlx5/core/en_fs_ethtool.c| 586 + drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 27 +- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c| 31 +- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 100 ++-- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 55 +- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 272 -- drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 8 + include/linux/mlx5/driver.h| 6 +- include/linux/mlx5/fs.h| 11 +- 15 files changed, 972 insertions(+), 405 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c -- 2.8.0
[PATCH net] net: poll tx timeout only on active tx queues
From: Mohamad Haj Yahia <moha...@mellanox.com> Change the netdev watchdog to poll only the real active tx queues instead of polling all tx queues. The netdev driver doesn't necessarily have to start/stop all the tx queues including the inactive tx queues. Fixes: fd2ea0a79faa ('net: Use queue aware tests throughout.') Signed-off-by: Mohamad Haj Yahia <moha...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- net/sched/sch_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index f9e0e9c..a10f0ff 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -253,7 +253,7 @@ static void dev_watchdog(unsigned long arg) unsigned int i; unsigned long trans_start; - for (i = 0; i < dev->num_tx_queues; i++) { + for (i = 0; i < dev->real_num_tx_queues; i++) { struct netdev_queue *txq; txq = netdev_get_tx_queue(dev, i); -- 2.8.0
[PATCH net 09/13] net/mlx5e: Handle RQ flush in error cases
From: Daniel Jurgens <dani...@mellanox.com> Add a timeout to avoid an infinite loop waiting for RQ's to flush. This occurs during AER/EEH and will also happen if the device stops posting completions due to internal error or reset, or if moving the RQ to the error state fails. Also cleanup posted receive resources when closing the RQ. Fixes: f62b8bb8f2d3 ('net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet functionality') Signed-off-by: Daniel Jurgens <dani...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 7 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 16 +++-- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 41 +++ 3 files changed, 61 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 244aced..b429591 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -191,6 +191,7 @@ struct mlx5e_tstamp { enum { MLX5E_RQ_STATE_POST_WQES_ENABLE, MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, + MLX5E_RQ_STATE_FLUSH_TIMEOUT, }; struct mlx5e_cq { @@ -220,6 +221,8 @@ typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq *rq, typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq *rq, u16 ix); + struct mlx5e_dma_info { struct page *page; dma_addr_t addr; @@ -241,6 +244,7 @@ struct mlx5e_rq { struct mlx5e_cqcq; mlx5e_fp_handle_rx_cqe handle_rx_cqe; mlx5e_fp_alloc_wqe alloc_wqe; + mlx5e_fp_dealloc_wqe dealloc_wqe; unsigned long state; intix; @@ -592,12 +596,15 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); void mlx5e_free_tx_descs(struct mlx5e_sq *sq); +void mlx5e_free_rx_descs(struct mlx5e_rq *rq); void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); +void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq); void mlx5e_complete_rx_linear_mpwqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 38c1286..103feab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -332,6 +332,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, } rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq; rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; + rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz); rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides); @@ -347,6 +348,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, } rq->handle_rx_cqe = mlx5e_handle_rx_cqe; rq->alloc_wqe = mlx5e_alloc_rx_wqe; + rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; rq->wqe_sz = (priv->params.lro_en) ? priv->params.lro_wqe_sz : @@ -552,17 +554,25 @@ err_destroy_rq: static void mlx5e_close_rq(struct mlx5e_rq *rq) { + int tout = 0; + int err; + clear_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, >state); napi_synchronize(>channel->napi); /* prevent mlx5e_post_rx_wqes */ - mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); - while (!mlx5_wq_ll_is_empty(>wq)) - msleep(20); + err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); + while (!mlx5_wq_ll_is_empty(>wq) && !err && + tout++ < MLX5_EN_QP_FLUSH_MAX_ITER) + msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT); + + if (err || tout == MLX5_EN_QP_FLUSH_MAX_ITER) + set_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, >state); /* avoid destroying rq before mlx5e_poll_rx_cq() is done with it */ napi_synchronize(>channel->napi); mlx5e_disable_rq(rq); + mlx5e_free_rx_descs(rq); mlx5e_destroy_rq(rq); } diff --git a/driv
[PATCH net 10/13] net/mlx5e: Copy all L2 headers into inline segment
From: Matthew Finlay <m...@mellanox.com> ConnectX4-Lx uses an inline wqe mode that currently defaults to requiring the entire L2 header be included in the wqe. This patch fixes mlx5e_get_inline_hdr_size() to account for all L2 headers (VLAN, QinQ, etc) using skb_network_offset(skb). Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files") Signed-off-by: Matthew Finlay <m...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 65e3bce..42a5f06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -123,7 +123,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, * headers and occur before the data gather. * Therefore these headers must be copied into the WQE */ -#define MLX5E_MIN_INLINE ETH_HLEN +#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN) if (bf) { u16 ihs = skb_headlen(skb); @@ -135,7 +135,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, return skb_headlen(skb); } - return MLX5E_MIN_INLINE; + return max(skb_network_offset(skb), MLX5E_MIN_INLINE); } static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data, -- 2.8.0
[PATCH net 12/13] net/mlx5e: Validate BW weight values of ETS
From: Rana Shahout <ra...@mellanox.com> Valid weight assigned to ETS TClass values are 1-100 Fixes: 08fb1dacdd76 ('net/mlx5e: Support DCBNL IEEE ETS') Signed-off-by: Rana Shahout <ra...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 8 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index b429591..943b1bd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -145,7 +145,6 @@ struct mlx5e_umr_wqe { #ifdef CONFIG_MLX5_CORE_EN_DCB #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ -#define MLX5E_MIN_BW_ALLOC 1 /* Min percentage of BW allocation */ #endif struct mlx5e_params { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index b2db180..c585349 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -96,7 +96,7 @@ static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw, tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC; break; case IEEE_8021QAZ_TSA_ETS: - tc_tx_bw[i] = ets->tc_tx_bw[i] ?: MLX5E_MIN_BW_ALLOC; + tc_tx_bw[i] = ets->tc_tx_bw[i]; break; } } @@ -140,8 +140,12 @@ static int mlx5e_dbcnl_validate_ets(struct ieee_ets *ets) /* Validate Bandwidth Sum */ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) { + if (!ets->tc_tx_bw[i]) + return -EINVAL; + bw_sum += ets->tc_tx_bw[i]; + } } if (bw_sum != 0 && bw_sum != 100) -- 2.8.0
[PATCH net 04/13] net/mlx5: Fix wait_vital for VFs and remove fixed sleep
From: Daniel Jurgens <dani...@mellanox.com> The device ID for VFs is in a different location than PFs. This results in the poll always timing out for VFs. There's no good way to read the VF device ID without using the PF's configuration space. Switch to waiting for the health poll to start incrementing. Also remove the 1s sleep at the beginning. fixes: 89d44f0a6c73 ('net/mlx5_core: Add pci error handlers to mlx5_core driver') Signed-off-by: Daniel Jurgens <dani...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 41 ++ 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c65f4a1..6695893 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1422,46 +1422,31 @@ void mlx5_disable_device(struct mlx5_core_dev *dev) mlx5_pci_err_detected(dev->pdev, 0); } -/* wait for the device to show vital signs. For now we check - * that we can read the device ID and that the health buffer - * shows a non zero value which is different than 0x +/* wait for the device to show vital signs by waiting + * for the health counter to start counting. */ -static void wait_vital(struct pci_dev *pdev) +static int wait_vital(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct mlx5_core_health *health = >priv.health; const int niter = 100; + u32 last_count = 0; u32 count; - u16 did; int i; - /* Wait for firmware to be ready after reset */ - msleep(1000); - for (i = 0; i < niter; i++) { - if (pci_read_config_word(pdev, 2, )) { - dev_warn(>dev, "failed reading config word\n"); - break; - } - if (did == pdev->device) { - dev_info(>dev, "device ID correctly read after %d iterations\n", i); - break; - } - msleep(50); - } - if (i == niter) - dev_warn(>dev, "%s-%d: could not read device ID\n", __func__, __LINE__); - for (i = 0; i < niter; i++) { count = ioread32be(health->health_counter); if (count && count != 0x) { - dev_info(>dev, "Counter value 0x%x after %d iterations\n", count, i); - break; + if (last_count && last_count != count) { + dev_info(>dev, "Counter value 0x%x after %d iterations\n", count, i); + return 0; + } + last_count = count; } msleep(50); } - if (i == niter) - dev_warn(>dev, "%s-%d: could not read device ID\n", __func__, __LINE__); + return -ETIMEDOUT; } static void mlx5_pci_resume(struct pci_dev *pdev) @@ -1473,7 +1458,11 @@ static void mlx5_pci_resume(struct pci_dev *pdev) dev_info(>dev, "%s was called\n", __func__); pci_save_state(pdev); - wait_vital(pdev); + err = wait_vital(pdev); + if (err) { + dev_err(>dev, "%s: wait_vital timed out\n", __func__); + return; + } err = mlx5_load_one(dev, priv); if (err) -- 2.8.0
[PATCH net 08/13] net/mlx5e: Implement ndo_tx_timeout callback
From: Daniel Jurgens <dani...@mellanox.com> Add callback to handle TX timeouts. Fixes: f62b8bb8f2d3 ('net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet functionality') Signed-off-by: Daniel Jurgens <dani...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 46 +++ 2 files changed, 47 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index c22d8c8..244aced 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -539,6 +539,7 @@ struct mlx5e_priv { struct workqueue_struct*wq; struct work_struct update_carrier_work; struct work_struct set_rx_mode_work; + struct work_struct tx_timeout_work; struct delayed_workupdate_stats_work; struct mlx5_core_dev *mdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b94c84b..38c1286 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -98,6 +98,26 @@ static void mlx5e_update_carrier_work(struct work_struct *work) mutex_unlock(>state_lock); } +static void mlx5e_tx_timeout_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + tx_timeout_work); + int err; + + rtnl_lock(); + mutex_lock(>state_lock); + if (!test_bit(MLX5E_STATE_OPENED, >state)) + goto unlock; + mlx5e_close_locked(priv->netdev); + err = mlx5e_open_locked(priv->netdev); + if (err) + netdev_err(priv->netdev, "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n", + err); +unlock: + mutex_unlock(>state_lock); + rtnl_unlock(); +} + static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) { struct mlx5e_sw_stats *s = >stats.sw; @@ -2609,6 +2629,29 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb, return features; } +static void mlx5e_tx_timeout(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + bool sched_work = false; + int i; + + netdev_err(dev, "TX timeout detected\n"); + + for (i = 0; i < priv->params.num_channels * priv->params.num_tc; i++) { + struct mlx5e_sq *sq = priv->txq_to_sq_map[i]; + + if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, i))) + continue; + sched_work = true; + set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, >state); + netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x\n", + i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc); + } + + if (sched_work && test_bit(MLX5E_STATE_OPENED, >state)) + schedule_work(>tx_timeout_work); +} + static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_open= mlx5e_open, .ndo_stop= mlx5e_close, @@ -2626,6 +2669,7 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = { #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif + .ndo_tx_timeout = mlx5e_tx_timeout, }; static const struct net_device_ops mlx5e_netdev_ops_sriov = { @@ -2655,6 +2699,7 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = { .ndo_get_vf_config = mlx5e_get_vf_config, .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats= mlx5e_get_vf_stats, + .ndo_tx_timeout = mlx5e_tx_timeout, }; static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) @@ -2857,6 +2902,7 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, INIT_WORK(>update_carrier_work, mlx5e_update_carrier_work); INIT_WORK(>set_rx_mode_work, mlx5e_set_rx_mode_work); + INIT_WORK(>tx_timeout_work, mlx5e_tx_timeout_work); INIT_DELAYED_WORK(>update_stats_work, mlx5e_update_stats_work); } -- 2.8.0
[PATCH net 00/13] Mellanox 100G mlx5 resiliency and xmit path fixes
Hi Dave, This series provides two set of fixes to the mlx5 driver: - Resiliency fixes for reset flow and internal pci errors - xmit path fixes Please consider queuing those patches for -stable (4.6). Reset flow fixes for core driver: - Add more commands to the list of error simulated commands when pci errors occur - Avoid calling sleeping function by the health poll thread - Fix incorrect page count when in internal error - Fix timeout in wait vital for VFs - Deadlock fix and Timeout handling in commands interface Reset flow and resiliency fixes for mlx5e netdev driver: - Handle RQ flush in error cases - Implement ndo_tx_timeout callback - Timeout if SQ doesn't flush during close - Log link state changes - Validate BW weight values of ETS xmit path fixes: - Fix wrong fallback assumption in select queue callback - Account for all L2 headers when copying headers into inline segment Thanks, Saeed. Daniel Jurgens (5): net/mlx5: Fix incorrect page count when in internal error net/mlx5: Fix wait_vital for VFs and remove fixed sleep net/mlx5e: Timeout if SQ doesn't flush during close net/mlx5e: Implement ndo_tx_timeout callback net/mlx5e: Handle RQ flush in error cases Matthew Finlay (1): net/mlx5e: Copy all L2 headers into inline segment Mohamad Haj Yahia (4): net/mlx5: Fix teardown errors that happen in pci error handler net/mlx5: Avoid calling sleeping function by the health poll thread net/mlx5: Fix potential deadlock in command mode change net/mlx5: Add timeout handle to commands with callback Rana Shahout (2): net/mlx5e: Fix select queue callback net/mlx5e: Validate BW weight values of ETS Shaker Daibes (1): net/mlx5e: Log link state changes drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 129 - drivers/net/ethernet/mellanox/mlx5/core/en.h | 11 +- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 8 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 99 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c| 41 +++ drivers/net/ethernet/mellanox/mlx5/core/en_tx.c| 52 - drivers/net/ethernet/mellanox/mlx5/core/health.c | 11 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 41 +++ .../net/ethernet/mellanox/mlx5/core/pagealloc.c| 63 +++--- include/linux/mlx5/driver.h| 1 + 10 files changed, 335 insertions(+), 121 deletions(-) -- 2.8.0
[PATCH net 11/13] net/mlx5e: Fix select queue callback
From: Rana Shahout <ra...@mellanox.com> The default fallback function used by mlx5e select queue can return any TX queues in range [0..dev->num_real_tx_queues). The current implementation assumes that the fallback function returns a number in the range [0.. number of channels). Actually dev->num_real_tx_queues = (number of channels) * dev->num_tc; which is more than the expected range if num_tc is configured and could lead to crashes. To fix this we test if num_tc is not configured we can safely return the fallback suggestion, if not we will reciprocal_scale the fallback result and normalize it to the desired range. Fixes: 08fb1dacdd76 ('net/mlx5e: Support DCBNL IEEE ETS') Signed-off-by: Rana Shahout <ra...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> Reported-by: Doug Ledford <dledf...@redhat.com> --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 - drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 16 ++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 103feab..216fe3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1707,8 +1707,11 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev) netdev_set_num_tc(netdev, ntc); + /* Map netdev TCs to offset 0 +* We have our own UP to TXQ mapping for QoS +*/ for (tc = 0; tc < ntc; tc++) - netdev_set_tc_queue(netdev, tc, nch, tc * nch); + netdev_set_tc_queue(netdev, tc, nch, 0); } int mlx5e_open_locked(struct net_device *netdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 42a5f06..5740b46 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -110,8 +110,20 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, { struct mlx5e_priv *priv = netdev_priv(dev); int channel_ix = fallback(dev, skb); - int up = (netdev_get_num_tc(dev) && skb_vlan_tag_present(skb)) ? -skb->vlan_tci >> VLAN_PRIO_SHIFT : 0; + int up = 0; + + if (!netdev_get_num_tc(dev)) + return channel_ix; + + if (skb_vlan_tag_present(skb)) + up = skb->vlan_tci >> VLAN_PRIO_SHIFT; + + /* channel_ix can be larger than num_channels since +* dev->num_real_tx_queues = num_channels * num_tc +*/ + if (channel_ix >= priv->params.num_channels) + channel_ix = reciprocal_scale(channel_ix, + priv->params.num_channels); return priv->channeltc_to_txq_map[channel_ix][up]; } -- 2.8.0
[PATCH net 06/13] net/mlx5: Add timeout handle to commands with callback
From: Mohamad Haj Yahia <moha...@mellanox.com> The current implementation does not handle timeout in case of command with callback request, and this can lead to deadlock if the command doesn't get fw response. Add delayed callback timeout work before posting the command to fw. In case of real fw command completion we will cancel the delayed work. In case of fw command timeout the callback timeout handler will be called and it will simulate fw completion with timeout error. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Mohamad Haj Yahia <moha...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 38 ++- include/linux/mlx5/driver.h | 1 + 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 74067f5..d6e2a1c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -606,11 +606,36 @@ static void dump_command(struct mlx5_core_dev *dev, pr_debug("\n"); } +static u16 msg_to_opcode(struct mlx5_cmd_msg *in) +{ + struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data); + + return be16_to_cpu(hdr->opcode); +} + +static void cb_timeout_handler(struct work_struct *work) +{ + struct delayed_work *dwork = container_of(work, struct delayed_work, + work); + struct mlx5_cmd_work_ent *ent = container_of(dwork, +struct mlx5_cmd_work_ent, +cb_timeout_work); + struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev, +cmd); + + ent->ret = -ETIMEDOUT; + mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", + mlx5_command_str(msg_to_opcode(ent->in)), + msg_to_opcode(ent->in)); + mlx5_cmd_comp_handler(dev, 1UL << ent->idx); +} + static void cmd_work_handler(struct work_struct *work) { struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); struct mlx5_cmd *cmd = ent->cmd; struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd); + unsigned long cb_timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC); struct mlx5_cmd_layout *lay; struct semaphore *sem; unsigned long flags; @@ -651,6 +676,9 @@ static void cmd_work_handler(struct work_struct *work) dump_command(dev, ent, 1); ent->ts1 = ktime_get_ns(); + if (ent->callback) + schedule_delayed_work(>cb_timeout_work, cb_timeout); + /* ring doorbell after the descriptor is valid */ mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); wmb(); @@ -695,13 +723,6 @@ static const char *deliv_status_to_str(u8 status) } } -static u16 msg_to_opcode(struct mlx5_cmd_msg *in) -{ - struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data); - - return be16_to_cpu(hdr->opcode); -} - static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) { unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC); @@ -765,6 +786,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, if (!callback) init_completion(>done); + INIT_DELAYED_WORK(>cb_timeout_work, cb_timeout_handler); INIT_WORK(>work, cmd_work_handler); if (page_queue) { cmd_work_handler(>work); @@ -1242,6 +1264,8 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec) struct semaphore *sem; ent = cmd->ent_arr[i]; + if (ent->callback) + cancel_delayed_work(>cb_timeout_work); if (ent->page_queue) sem = >pages_sem; else diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 80776d0..fd72ecf 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -629,6 +629,7 @@ struct mlx5_cmd_work_ent { void *uout; int uout_size; mlx5_cmd_cbk_t callback; + struct delayed_work cb_timeout_work; void *context; int idx; struct completion done; -- 2.8.0
[PATCH net 03/13] net/mlx5: Fix incorrect page count when in internal error
From: Daniel Jurgens <dani...@mellanox.com> Change page cleanup flow when in internal error to properly decrement the page counts when reclaiming pages. The prevents timing out waiting for extra pages that were actually cleaned up previously. fixes: 89d44f0a6c73 ('net/mlx5_core: Add pci error handlers to mlx5_core driver') Signed-off-by: Daniel Jurgens <dani...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- .../net/ethernet/mellanox/mlx5/core/pagealloc.c| 63 +++--- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 905..32dea35 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -345,7 +345,6 @@ retry: func_id, npages, err); goto out_4k; } - dev->priv.fw_pages += npages; err = mlx5_cmd_status_to_err(); if (err) { @@ -373,6 +372,33 @@ out_free: return err; } +static int reclaim_pages_cmd(struct mlx5_core_dev *dev, +struct mlx5_manage_pages_inbox *in, int in_size, +struct mlx5_manage_pages_outbox *out, int out_size) +{ + struct fw_page *fwp; + struct rb_node *p; + u32 npages; + u32 i = 0; + + if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) + return mlx5_cmd_exec_check_status(dev, (u32 *)in, in_size, + (u32 *)out, out_size); + + npages = be32_to_cpu(in->num_entries); + + p = rb_first(>priv.page_root); + while (p && i < npages) { + fwp = rb_entry(p, struct fw_page, rb_node); + out->pas[i] = cpu_to_be64(fwp->addr); + p = rb_next(p); + i++; + } + + out->num_entries = cpu_to_be32(i); + return 0; +} + static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, int *nclaimed) { @@ -398,15 +424,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, in.func_id = cpu_to_be16(func_id); in.num_entries = cpu_to_be32(npages); mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen); - err = mlx5_cmd_exec(dev, , sizeof(in), out, outlen); + err = reclaim_pages_cmd(dev, , sizeof(in), out, outlen); if (err) { - mlx5_core_err(dev, "failed reclaiming pages\n"); - goto out_free; - } - dev->priv.fw_pages -= npages; - - if (out->hdr.status) { - err = mlx5_cmd_status_to_err(>hdr); + mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err); goto out_free; } @@ -417,13 +437,15 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, err = -EINVAL; goto out_free; } - if (nclaimed) - *nclaimed = num_claimed; for (i = 0; i < num_claimed; i++) { addr = be64_to_cpu(out->pas[i]); free_4k(dev, addr); } + + if (nclaimed) + *nclaimed = num_claimed; + dev->priv.fw_pages -= num_claimed; if (func_id) dev->priv.vfs_pages -= num_claimed; @@ -514,14 +536,10 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) p = rb_first(>priv.page_root); if (p) { fwp = rb_entry(p, struct fw_page, rb_node); - if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { - free_4k(dev, fwp->addr); - nclaimed = 1; - } else { - err = reclaim_pages(dev, fwp->func_id, - optimal_reclaimed_pages(), - ); - } + err = reclaim_pages(dev, fwp->func_id, + optimal_reclaimed_pages(), + ); + if (err) { mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", err); @@ -536,6 +554,13 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) } } while (p); + WARN(dev->priv.fw_pages, +"FW pages counter is %d after reclaiming all pages\n", +dev->priv.fw_pages); + WARN(dev->priv.vfs_pages, +"VFs FW pages counter is %d after reclaiming all pages\n", +dev->priv.vfs_pages); + return 0; } -- 2.8.0
[PATCH net 02/13] net/mlx5: Avoid calling sleeping function by the health poll thread
From: Mohamad Haj Yahia <moha...@mellanox.com> In internal error state the health poll thread will eventually call synchronize_irq() (to safely trigger command completions) which might sleep, so we are calling sleeping function from atomic context which is invalid. Here we move trigger_cmd_completions(dev) to enter error state which is the earliest stage in error state handling. This way we won't need to wait for next health poll to trigger command completions and will solve the scheduling while atomic issue. mlx5_enter_error_state can be called from two contexts, protect it with dev->intf_state_lock Fixes: 89d44f0a6c73 ('net/mlx5_core: Add pci error handlers to mlx5_core driver') Signed-off-by: Mohamad Haj Yahia <moha...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 42d16b9..96a5946 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -108,15 +108,21 @@ static int in_fatal(struct mlx5_core_dev *dev) void mlx5_enter_error_state(struct mlx5_core_dev *dev) { + mutex_lock(>intf_state_mutex); if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) - return; + goto unlock; mlx5_core_err(dev, "start\n"); - if (pci_channel_offline(dev->pdev) || in_fatal(dev)) + if (pci_channel_offline(dev->pdev) || in_fatal(dev)) { dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + trigger_cmd_completions(dev); + } mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0); mlx5_core_err(dev, "end\n"); + +unlock: + mutex_unlock(>intf_state_mutex); } static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) @@ -245,7 +251,6 @@ static void poll_health(unsigned long data) u32 count; if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { - trigger_cmd_completions(dev); mod_timer(>timer, get_next_poll_jiffies()); return; } -- 2.8.0
[PATCH net 05/13] net/mlx5: Fix potential deadlock in command mode change
From: Mohamad Haj Yahia <moha...@mellanox.com> Call command completion handler in case of timeout when working in interrupts mode. Avoid flushing the commands workqueue after acquiring the semaphores to prevent a potential deadlock. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Mohamad Haj Yahia <moha...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 79 +++ 1 file changed, 33 insertions(+), 46 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index fda43bc..74067f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -710,13 +710,13 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) if (cmd->mode == CMD_MODE_POLLING) { wait_for_completion(>done); - err = ent->ret; - } else { - if (!wait_for_completion_timeout(>done, timeout)) - err = -ETIMEDOUT; - else - err = 0; + } else if (!wait_for_completion_timeout(>done, timeout)) { + ent->ret = -ETIMEDOUT; + mlx5_cmd_comp_handler(dev, 1UL << ent->idx); } + + err = ent->ret; + if (err == -ETIMEDOUT) { mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", mlx5_command_str(msg_to_opcode(ent->in)), @@ -774,28 +774,26 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, goto out_free; } - if (!callback) { - err = wait_func(dev, ent); - if (err == -ETIMEDOUT) - goto out; - - ds = ent->ts2 - ent->ts1; - op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode); - if (op < ARRAY_SIZE(cmd->stats)) { - stats = >stats[op]; - spin_lock_irq(>lock); - stats->sum += ds; - ++stats->n; - spin_unlock_irq(>lock); - } - mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME, - "fw exec time for %s is %lld nsec\n", - mlx5_command_str(op), ds); - *status = ent->status; - free_cmd(ent); - } + if (callback) + goto out; - return err; + err = wait_func(dev, ent); + if (err == -ETIMEDOUT) + goto out_free; + + ds = ent->ts2 - ent->ts1; + op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode); + if (op < ARRAY_SIZE(cmd->stats)) { + stats = >stats[op]; + spin_lock_irq(>lock); + stats->sum += ds; + ++stats->n; + spin_unlock_irq(>lock); + } + mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME, + "fw exec time for %s is %lld nsec\n", + mlx5_command_str(op), ds); + *status = ent->status; out_free: free_cmd(ent); @@ -1185,41 +1183,30 @@ err_dbg: return err; } -void mlx5_cmd_use_events(struct mlx5_core_dev *dev) +static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode) { struct mlx5_cmd *cmd = >cmd; int i; for (i = 0; i < cmd->max_reg_cmds; i++) down(>sem); - down(>pages_sem); - flush_workqueue(cmd->wq); - - cmd->mode = CMD_MODE_EVENTS; + cmd->mode = mode; up(>pages_sem); for (i = 0; i < cmd->max_reg_cmds; i++) up(>sem); } -void mlx5_cmd_use_polling(struct mlx5_core_dev *dev) +void mlx5_cmd_use_events(struct mlx5_core_dev *dev) { - struct mlx5_cmd *cmd = >cmd; - int i; - - for (i = 0; i < cmd->max_reg_cmds; i++) - down(>sem); - - down(>pages_sem); - - flush_workqueue(cmd->wq); - cmd->mode = CMD_MODE_POLLING; + mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS); +} - up(>pages_sem); - for (i = 0; i < cmd->max_reg_cmds; i++) - up(>sem); +void mlx5_cmd_use_polling(struct mlx5_core_dev *dev) +{ + mlx5_cmd_change_mod(dev, CMD_MODE_POLLING); } static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) -- 2.8.0
Re: [PATCH net] net: poll tx timeout only on active tx queues
On Thu, Jun 30, 2016 at 5:28 PM, Eric Dumazet <eric.duma...@gmail.com> wrote: > On Thu, 2016-06-30 at 16:58 +0300, Saeed Mahameed wrote: >> - for (i = 0; i < dev->num_tx_queues; i++) { >> + for (i = 0; i < dev->real_num_tx_queues; i++) { >> struct netdev_queue *txq; >> >> txq = netdev_get_tx_queue(dev, i); > > Strange, why don't you change all others helpers that are using > num_tx_queues ? > which other helpers ? since this function assumes that all tx queues are started and if a non real_txq is stopped for more that timeout period it will start shouting call traces and warnings. > Which driver had a problem with this code ? non yet. currently all the device driver call netif_tx_start_all_queues(dev) on open to W/A this issue. which is strange since only real_num_tx_queues are active.
[PATCH net 07/13] net/mlx5e: Timeout if SQ doesn't flush during close
From: Daniel Jurgens <dani...@mellanox.com> Avoid an infinite loop by timing out waiting for the SQ to flush. Also clean up the TX descriptors if that happens. Fixes: f62b8bb8f2d3 ('net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet functionality') Signed-off-by: Daniel Jurgens <dani...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 25 +++--- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 32 +++ 3 files changed, 56 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index baa991a..c22d8c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -305,6 +305,7 @@ struct mlx5e_sq_dma { enum { MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, MLX5E_SQ_STATE_BF_ENABLE, + MLX5E_SQ_STATE_TX_TIMEOUT, }; struct mlx5e_ico_wqe_info { @@ -589,6 +590,7 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); +void mlx5e_free_tx_descs(struct mlx5e_sq *sq); void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index cb6defd..b94c84b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -39,6 +39,13 @@ #include "eswitch.h" #include "vxlan.h" +enum { + MLX5_EN_QP_FLUSH_TIMEOUT_MS = 5000, + MLX5_EN_QP_FLUSH_MSLEEP_QUANT = 20, + MLX5_EN_QP_FLUSH_MAX_ITER = MLX5_EN_QP_FLUSH_TIMEOUT_MS / + MLX5_EN_QP_FLUSH_MSLEEP_QUANT, +}; + struct mlx5e_rq_param { u32rqc[MLX5_ST_SZ_DW(rqc)]; struct mlx5_wq_param wq; @@ -782,6 +789,9 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq) static void mlx5e_close_sq(struct mlx5e_sq *sq) { + int tout = 0; + int err; + if (sq->txq) { clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, >state); /* prevent netif_tx_wake_queue */ @@ -792,15 +802,24 @@ static void mlx5e_close_sq(struct mlx5e_sq *sq) if (mlx5e_sq_has_room_for(sq, 1)) mlx5e_send_nop(sq, true); - mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR); + err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, + MLX5_SQC_STATE_ERR); + if (err) + set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, >state); } - while (sq->cc != sq->pc) /* wait till sq is empty */ - msleep(20); + /* wait till sq is empty, unless a TX timeout occurred on this SQ */ + while (sq->cc != sq->pc && + !test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, >state)) { + msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT); + if (tout++ > MLX5_EN_QP_FLUSH_MAX_ITER) + set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, >state); + } /* avoid destroying sq before mlx5e_poll_tx_cq() is done with it */ napi_synchronize(>channel->napi); + mlx5e_free_tx_descs(sq); mlx5e_disable_sq(sq); mlx5e_destroy_sq(sq); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 5a750b9cd..65e3bce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -341,6 +341,35 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) return mlx5e_sq_xmit(sq, skb); } +void mlx5e_free_tx_descs(struct mlx5e_sq *sq) +{ + struct mlx5e_tx_wqe_info *wi; + struct sk_buff *skb; + u16 ci; + int i; + + while (sq->cc != sq->pc) { + ci = sq->cc & sq->wq.sz_m1; + skb = sq->skb[ci]; + wi = >wqe_info[ci]; + + if (!skb) { /* nop */ + sq->cc++; + continue; + } + + for (i = 0; i < wi->num_dma; i++) { + struct mlx5e_sq_dma *dma = + mlx5e_dma_get(sq, sq->dma_fifo_cc++); + + mlx5e_tx_dma_unmap(sq->pdev, dma); + } + + dev_kfree_skb_any(skb); + sq->cc += wi->num_wqebbs; +
[PATCH net 13/13] net/mlx5e: Log link state changes
From: Shaker Daibes <shak...@mellanox.com> Add Link UP/Down prints to kernel log when link state changes Signed-off-by: Shaker Daibes <shak...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 216fe3e..7a0dca2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -81,10 +81,13 @@ static void mlx5e_update_carrier(struct mlx5e_priv *priv) port_state = mlx5_query_vport_state(mdev, MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0); - if (port_state == VPORT_STATE_UP) + if (port_state == VPORT_STATE_UP) { + netdev_info(priv->netdev, "Link up\n"); netif_carrier_on(priv->netdev); - else + } else { + netdev_info(priv->netdev, "Link down\n"); netif_carrier_off(priv->netdev); + } } static void mlx5e_update_carrier_work(struct work_struct *work) -- 2.8.0
[PATCH net 01/13] net/mlx5: Fix teardown errors that happen in pci error handler
From: Mohamad Haj Yahia <moha...@mellanox.com> In case of internal error state we will simulate the commands status through the return value translation function, but we need to simulate all the teardown fw commands as successful so we will not have fw command failure prints. This also fix memory leaks that happen because we skip teardown stages due to failed fw commands. Fixes: 89d44f0a6c73 ('net/mlx5_core: Add pci error handlers to mlx5_core driver') Signed-off-by: Mohamad Haj Yahia <moha...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 12 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 0b49862..fda43bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -295,6 +295,12 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DESTROY_FLOW_GROUP: case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_DEALLOC_FLOW_COUNTER: + case MLX5_CMD_OP_2ERR_QP: + case MLX5_CMD_OP_2RST_QP: + case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT: + case MLX5_CMD_OP_MODIFY_FLOW_TABLE: + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -321,8 +327,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_RTR2RTS_QP: case MLX5_CMD_OP_RTS2RTS_QP: case MLX5_CMD_OP_SQERR2RTS_QP: - case MLX5_CMD_OP_2ERR_QP: - case MLX5_CMD_OP_2RST_QP: case MLX5_CMD_OP_QUERY_QP: case MLX5_CMD_OP_SQD_RTS_QP: case MLX5_CMD_OP_INIT2INIT_QP: @@ -342,7 +346,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT: case MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT: case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT: - case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT: case MLX5_CMD_OP_QUERY_ROCE_ADDRESS: case MLX5_CMD_OP_SET_ROCE_ADDRESS: case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: @@ -390,11 +393,12 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_CREATE_RQT: case MLX5_CMD_OP_MODIFY_RQT: case MLX5_CMD_OP_QUERY_RQT: + case MLX5_CMD_OP_CREATE_FLOW_TABLE: case MLX5_CMD_OP_QUERY_FLOW_TABLE: case MLX5_CMD_OP_CREATE_FLOW_GROUP: case MLX5_CMD_OP_QUERY_FLOW_GROUP: - case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: case MLX5_CMD_OP_QUERY_FLOW_COUNTER: -- 2.8.0
[PATCH net-next V2 12/16] net/mlx5e: TIRs management refactoring
From: Hadar Hen Zion <had...@mellanox.com> The current refresh tirs self loopback mechanism, refreshes all the tirs belonging to the same mlx5e instance to prevent self loopback by packets sent over any ring of that instance. This mechanism relies on all the tirs/tises of an instance to be created with the same transport domain number (tdn). Change the driver to refresh all the tirs created under the same tdn regardless of which mlx5e netdev instance they belong to. This behaviour is needed for introducing new mlx5e instances which serve to represent SRIOV VFs. The representors and the PF share vport used for E-Switch management, and we want to avoid NIC level HW loopback between them, e.g when sending broadcast packets. To achieve that, both the representors and the PF NIC will share the tdn. This patch doesn't add any new functionality. Signed-off-by: Hadar Hen Zion <had...@mellanox.com> Reviewed-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 12 +++-- drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c | 14 +++--- .../net/ethernet/mellanox/mlx5/core/en_common.c| 48 +++ .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c| 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 56 +- 6 files changed, 77 insertions(+), 57 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index da93bf55..ded3f96 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -552,9 +552,10 @@ struct mlx5e_flow_steering { struct mlx5e_arfs_tablesarfs; }; -struct mlx5e_direct_tir { +struct mlx5e_tir { u32 tirn; u32 rqtn; + struct list_head list; }; enum { @@ -576,8 +577,8 @@ struct mlx5e_priv { struct mlx5e_channel **channel; u32tisn[MLX5E_MAX_NUM_TC]; u32indir_rqtn; - u32indir_tirn[MLX5E_NUM_INDIR_TIRS]; - struct mlx5e_direct_tirdirect_tir[MLX5E_MAX_NUM_CHANNELS]; + struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; u32tx_rates[MLX5E_MAX_NUM_SQS]; struct mlx5e_flow_steering fs; @@ -784,7 +785,12 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, #endif u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev); +int mlx5e_create_tir(struct mlx5_core_dev *mdev, +struct mlx5e_tir *tir, u32 *in, int inlen); +void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, + struct mlx5e_tir *tir); int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); +int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 3515e78..10f18d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -93,14 +93,14 @@ static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type) static int arfs_disable(struct mlx5e_priv *priv) { struct mlx5_flow_destination dest; - u32 *tirn = priv->indir_tirn; + struct mlx5e_tir *tir = priv->indir_tir; int err = 0; int tt; int i; dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; for (i = 0; i < ARFS_NUM_TYPES; i++) { - dest.tir_num = tirn[i]; + dest.tir_num = tir[i].tirn; tt = arfs_get_tt(i); /* Modify ttc rules destination to bypass the aRFS tables*/ err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt], @@ -176,7 +176,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, struct arfs_table *arfs_t = >fs.arfs.arfs_tables[type]; struct mlx5_flow_destination dest; u8 match_criteria_enable = 0; - u32 *tirn = priv->indir_tirn; + struct mlx5e_tir *tir = priv->indir_tir; u32 *match_criteria; u32 *match_value; int err = 0; @@ -192,16 +192,16 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; switch (type) { case ARFS_IPV4_TCP: - dest.tir_num = tirn[MLX5E_TT_IPV4_TCP]; + dest.tir_num = tir[MLX5E_TT_IPV4_TCP].tirn; break; case ARFS_IPV4_UDP: - dest.tir_num = tirn[MLX5E_TT_IPV4_UDP]; + dest.tir_num = tir[MLX5E_TT_IPV4_U
[PATCH net-next V2 05/16] net/mlx5: Introduce offloads steering namespace
From: Or Gerlitz <ogerl...@mellanox.com> Add a new namespace (MLX5_FLOW_NAMESPACE_OFFLOADS) to be populated with flow steering rules that deal with rules that have have to be executed before the EN NIC steering rules are matched. The namespace is located after the bypass name-space and before the kernel name-space. Therefore, it precedes the HW processing done for rules set for the kernel NIC name-space. Under SRIOV, it would allow us to match on e-switch missed packet and forward them to the relevant VF representor TIR. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Amir Vadai <a...@vadai.me> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 11 ++- include/linux/mlx5/fs.h | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index e912a3d..b040110 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -83,6 +83,11 @@ #define ANCHOR_NUM_LEVELS 1 #define ANCHOR_NUM_PRIOS 1 #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1) + +#define OFFLOADS_MAX_FT 1 +#define OFFLOADS_NUM_PRIOS 1 +#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1) + struct node_caps { size_t arr_sz; long*caps; @@ -98,7 +103,7 @@ static struct init_tree_node { int num_levels; } root_fs = { .type = FS_TYPE_NAMESPACE, - .ar_size = 4, + .ar_size = 5, .children = (struct init_tree_node[]) { ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), @@ -107,6 +112,9 @@ static struct init_tree_node { FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, BY_PASS_PRIO_NUM_LEVELS))), + ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {}, +ADD_NS(ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, OFFLOADS_MAX_FT))), + ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {}, ADD_NS(ADD_MULTIPLE_PRIO(1, 1), ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS, @@ -1369,6 +1377,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, switch (type) { case MLX5_FLOW_NAMESPACE_BYPASS: + case MLX5_FLOW_NAMESPACE_OFFLOADS: case MLX5_FLOW_NAMESPACE_KERNEL: case MLX5_FLOW_NAMESPACE_LEFTOVERS: case MLX5_FLOW_NAMESPACE_ANCHOR: diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 4b7a107..6ad1119 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -54,6 +54,7 @@ static inline void build_leftovers_ft_param(int *priority, enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_BYPASS, + MLX5_FLOW_NAMESPACE_OFFLOADS, MLX5_FLOW_NAMESPACE_KERNEL, MLX5_FLOW_NAMESPACE_LEFTOVERS, MLX5_FLOW_NAMESPACE_ANCHOR, -- 2.8.0
[PATCH net-next V2 13/16] net/mlx5e: Mark enabled RQTs instances explicitly
From: Hadar Hen Zion <had...@mellanox.com> In the current driver implementation two types of receive queue tables (RQTs) are in use - direct and indirect. Change the driver to mark each new created RQT (direct or indirect) as "enabled". This behaviour is needed for introducing new mlx5e instances which serve to represent SRIOV VFs. The VF representors will have only one type of RQTs (direct). An "enabled" flag is added to each RQT to allow better handling and code sharing between the representors and the nic netdevices. This patch doesn't add any new functionality. Signed-off-by: Hadar Hen Zion <had...@mellanox.com> Reviewed-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 13 +-- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 45 +- 3 files changed, 37 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index ded3f96..1843a4c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -552,10 +552,15 @@ struct mlx5e_flow_steering { struct mlx5e_arfs_tablesarfs; }; -struct mlx5e_tir { - u32 tirn; +struct mlx5e_rqt { u32 rqtn; - struct list_head list; + bool enabled; +}; + +struct mlx5e_tir { + u32 tirn; + struct mlx5e_rqt rqt; + struct list_head list; }; enum { @@ -576,7 +581,7 @@ struct mlx5e_priv { struct mlx5e_channel **channel; u32tisn[MLX5E_MAX_NUM_TC]; - u32indir_rqtn; + struct mlx5e_rqt indir_rqt; struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; u32tx_rates[MLX5E_MAX_NUM_SQS]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 877cf68..7c5c477 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -898,7 +898,7 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, mutex_lock(>state_lock); if (indir) { - u32 rqtn = priv->indir_rqtn; + u32 rqtn = priv->indir_rqt.rqtn; memcpy(priv->params.indirection_rqt, indir, sizeof(priv->params.indirection_rqt)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 808dff4..db890b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1487,7 +1487,8 @@ static void mlx5e_fill_direct_rqt_rqn(struct mlx5e_priv *priv, void *rqtc, MLX5_SET(rqtc, rqtc, rq_num[0], rqn); } -static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, int ix, u32 *rqtn) +static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, + int ix, struct mlx5e_rqt *rqt) { struct mlx5_core_dev *mdev = priv->mdev; void *rqtc; @@ -1510,34 +1511,37 @@ static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, int ix, u32 *rqtn) else mlx5e_fill_direct_rqt_rqn(priv, rqtc, ix); - err = mlx5_core_create_rqt(mdev, in, inlen, rqtn); + err = mlx5_core_create_rqt(mdev, in, inlen, >rqtn); + if (!err) + rqt->enabled = true; kvfree(in); return err; } -static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, u32 rqtn) +static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt) { - mlx5_core_destroy_rqt(priv->mdev, rqtn); + rqt->enabled = false; + mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn); } static int mlx5e_create_rqts(struct mlx5e_priv *priv) { int nch = mlx5e_get_max_num_channels(priv->mdev); - u32 *rqtn; + struct mlx5e_rqt *rqt; int err; int ix; /* Indirect RQT */ - rqtn = >indir_rqtn; - err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqtn); + rqt = >indir_rqt; + err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt); if (err) return err; /* Direct RQTs */ for (ix = 0; ix < nch; ix++) { - rqtn = >direct_tir[ix].rqtn; - err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqtn); + rqt = >direct_tir[ix].rqt; + err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqt); if (err) goto err_
[PATCH net-next V2 03/16] net/mlx5: E-Switch, Add miss rule for offloads mode
From: Or Gerlitz <ogerl...@mellanox.com> In the sriov offloads mode, packets that are not matched by any other rule should be sent towards the e-switch manager for further processing. Add such "miss" rule which matches ANY packet as the last rule in the e-switch FDB and programs the HW to send the packet to vport 0 where the e-switch manager runs. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 1 + .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 40 ++ 2 files changed, 41 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 2360180..8eed33f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -144,6 +144,7 @@ struct mlx5_eswitch_fdb { struct offloads_fdb { struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *miss_grp; + struct mlx5_flow_rule *miss_rule; } offloads; }; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index c6b28df..e3d81ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -38,6 +38,39 @@ #include "mlx5_core.h" #include "eswitch.h" +static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule = NULL; + u32 *match_v, *match_c; + int err = 0; + + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!match_v || !match_c) { + esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n"); + err = -ENOMEM; + goto out; + } + + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport_num = 0; + + flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, 0, match_c, match_v, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, 0, ); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + esw_warn(esw->dev, "FDB: Failed to add miss flow rule err %d\n", err); + goto out; + } + + esw->fdb_table.offloads.miss_rule = flow_rule; +out: + kfree(match_v); + kfree(match_c); + return err; +} + #define MAX_PF_SQ 256 int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) @@ -110,8 +143,14 @@ int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) } esw->fdb_table.offloads.miss_grp = g; + err = esw_add_fdb_miss_rule(esw); + if (err) + goto miss_rule_err; + return 0; +miss_rule_err: + mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); miss_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); send_vport_err: @@ -128,6 +167,7 @@ void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) return; esw_debug(esw->dev, "Destroy offloads FDB Table\n"); + mlx5_del_flow_rule(esw->fdb_table.offloads.miss_rule); mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); -- 2.8.0
[PATCH net-next V2 16/16] net/mlx5e: Introduce SRIOV VF representors
From: Hadar Hen Zion <had...@mellanox.com> Implement the relevant profile functions to create mlx5e driver instance serving as VF representor. When SRIOV offloads mode is enabled, each VF will have a representor netdevice instance on the host. To do that, we also export set of shared service functions from en_main.c, such that they can be used by both NIC and repsresentors netdevs. The newly created representor netdevice has a basic set of net_device_ops which are the same ndo functions as the NIC netdevice and an ndo of it's own for phys port name. The profiling infrastructure allow sharing code between the NIC and the vport representor even though the representor has only a subset of the NIC functionality. The VF reps and the PF which is used in that mode to represent the uplink, expose switchdev ops. Currently the only op supposed is attr get for the port parent ID which here serves to identify net-devices belonging to the same HW E-Switch. Other than that, no offloading is implemented and hence switching functionality is achieved if one sets SW switching rules, e.g using tc, bridge or ovs. Port phys name (ndo_get_phys_port_name) is implemented to allow exporting to user-space the VF vport number and along with the switchdev port parent id (phys_switch_id) enable a udev base consistent naming scheme: SUBSYSTEM=="net", ACTION=="add", ATTR{phys_switch_id}=="", \ ATTR{phys_port_name}!="", NAME="$PF_NIC$attr{phys_port_name}" where phys_switch_id is exposed by the PF (and VF reps) and $PF_NIC is the name of the PF netdevice. Signed-off-by: Hadar Hen Zion <had...@mellanox.com> Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en.h | 28 ++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 53 ++- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 394 + drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 20 +- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 96 - 6 files changed, 574 insertions(+), 19 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 9b14dad..a574dea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -8,6 +8,6 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \ en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \ - en_tc.o en_arfs.o + en_tc.o en_arfs.o en_rep.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f61255c..5912a02 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -44,6 +44,7 @@ #include #include #include +#include #include "wq.h" #include "mlx5_core.h" #include "en_stats.h" @@ -816,4 +817,31 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev); +struct mlx5_eswitch_rep; +int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, +struct mlx5_eswitch_rep *rep); +void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); +int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep); +void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); +int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); +void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); +int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr); + +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv); +void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv); +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv); +int mlx5e_create_tises(struct mlx5e_priv *priv); +void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv); +int mlx5e_close(struct net_device *netdev); +int mlx5e_open(struct net_device *netdev); +void mlx5e_update_stats_work(struct work_struct *work); +void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile, void *ppriv); +void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv); +struct rtnl_link_stats64 * +mlx5e_get_stats(struct n
[PATCH net-next V2 15/16] net/mlx5: Add Representors registration API
From: Hadar Hen Zion <had...@mellanox.com> Introduce E-Switch registration/unregister representors functions. Those functions are called by the mlx5e driver when the PF NIC is created upon pci probe action regardless of the E-Switch mode (NONE, LEGACY or OFFLOADS). Adding basic E-Switch database that will hold the vport represntors upon creation. This patch doesn't add any new functionality. Signed-off-by: Hadar Hen Zion <had...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 60 +++--- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 10 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 12 + .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 19 +++ 5 files changed, 97 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 8d4d2b2..f61255c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -571,7 +571,7 @@ enum { struct mlx5e_profile { void(*init)(struct mlx5_core_dev *mdev, struct net_device *netdev, - const struct mlx5e_profile *profile); + const struct mlx5e_profile *profile, void *ppriv); void(*cleanup)(struct mlx5e_priv *priv); int (*init_rx)(struct mlx5e_priv *priv); void(*cleanup_rx)(struct mlx5e_priv *priv); @@ -618,6 +618,7 @@ struct mlx5e_priv { struct mlx5e_tstamptstamp; u16 q_counter; const struct mlx5e_profile *profile; + void *ppriv; }; enum mlx5e_link_mode { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8ffe68b..bfe3a4c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2881,7 +2881,8 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, struct net_device *netdev, - const struct mlx5e_profile *profile) + const struct mlx5e_profile *profile, + void *ppriv) { struct mlx5e_priv *priv = netdev_priv(netdev); u32 link_speed = 0; @@ -2963,6 +2964,7 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, priv->netdev = netdev; priv->params.num_channels = profile->max_nch(mdev); priv->profile = profile; + priv->ppriv= ppriv; #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_ets_init(priv); @@ -3127,18 +3129,25 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) static void mlx5e_nic_init(struct mlx5_core_dev *mdev, struct net_device *netdev, - const struct mlx5e_profile *profile) + const struct mlx5e_profile *profile, + void *ppriv) { struct mlx5e_priv *priv = netdev_priv(netdev); - mlx5e_build_nic_netdev_priv(mdev, netdev, profile); + mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv); mlx5e_build_nic_netdev(netdev); mlx5e_vxlan_init(priv); } static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + mlx5e_vxlan_cleanup(priv); + + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + mlx5_eswitch_unregister_vport_rep(esw, 0); } static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) @@ -3230,6 +3239,8 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) { struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + struct mlx5_eswitch_rep rep; if (mlx5e_vxlan_allowed(mdev)) { rtnl_lock(); @@ -3239,6 +3250,12 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5e_enable_async_events(priv); queue_work(priv->wq, >set_rx_mode_work); + + if (MLX5_CAP_GEN(mdev, vport_group_manager)) { + rep.vport = 0; + rep.priv_data = priv; + mlx5_eswitch_register_vport_rep(esw, ); + } } static void mlx5e_nic_disable(struct mlx5e_priv *priv) @@ -3262,7 +3279,7 @@ static const struct mlx5e_profile mlx5e_nic_profile = { }; static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, -
[PATCH net-next V2 14/16] net/mlx5e: Add support for multiple profiles
From: Hadar Hen Zion <had...@mellanox.com> To allow support in representor netdevices where we create more than one netdevice per NIC, add profiles to the mlx5e driver. The profiling allows for creation of mlx5e instances with different characteristics. Each profile implements its own behavior using set of function pointers defined in struct mlx5e_profile. This is done to allow for avoiding complex per profix branching in the code. Currently only the profile for the conventional NIC is implemented, which is of use when a netdev is created upon pci probe. This patch doesn't add any new functionality. Signed-off-by: Hadar Hen Zion <had...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 17 ++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 341 ++ 2 files changed, 240 insertions(+), 118 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 1843a4c..8d4d2b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -568,6 +568,22 @@ enum { MLX5E_NIC_PRIO }; +struct mlx5e_profile { + void(*init)(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile); + void(*cleanup)(struct mlx5e_priv *priv); + int (*init_rx)(struct mlx5e_priv *priv); + void(*cleanup_rx)(struct mlx5e_priv *priv); + int (*init_tx)(struct mlx5e_priv *priv); + void(*cleanup_tx)(struct mlx5e_priv *priv); + void(*enable)(struct mlx5e_priv *priv); + void(*disable)(struct mlx5e_priv *priv); + void(*update_stats)(struct mlx5e_priv *priv); + int (*max_nch)(struct mlx5_core_dev *mdev); + int max_tc; +}; + struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_sq**txq_to_sq_map; @@ -601,6 +617,7 @@ struct mlx5e_priv { struct mlx5e_stats stats; struct mlx5e_tstamptstamp; u16 q_counter; + const struct mlx5e_profile *profile; }; enum mlx5e_link_mode { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index db890b2..8ffe68b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -234,7 +234,7 @@ static void mlx5e_update_stats_work(struct work_struct *work) update_stats_work); mutex_lock(>state_lock); if (test_bit(MLX5E_STATE_OPENED, >state)) { - mlx5e_update_stats(priv); + priv->profile->update_stats(priv); queue_delayed_work(priv->wq, dwork, msecs_to_jiffies(MLX5E_UPDATE_STATS_INTERVAL)); } @@ -1037,7 +1037,7 @@ static void mlx5e_build_channeltc_to_txq_map(struct mlx5e_priv *priv, int ix) { int i; - for (i = 0; i < MLX5E_MAX_NUM_TC; i++) + for (i = 0; i < priv->profile->max_tc; i++) priv->channeltc_to_txq_map[ix][i] = ix + i * priv->params.num_channels; } @@ -1525,21 +1525,20 @@ static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt) mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn); } -static int mlx5e_create_rqts(struct mlx5e_priv *priv) +static int mlx5e_create_indirect_rqts(struct mlx5e_priv *priv) +{ + struct mlx5e_rqt *rqt = >indir_rqt; + + return mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt); +} + +static int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) { - int nch = mlx5e_get_max_num_channels(priv->mdev); struct mlx5e_rqt *rqt; int err; int ix; - /* Indirect RQT */ - rqt = >indir_rqt; - err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt); - if (err) - return err; - - /* Direct RQTs */ - for (ix = 0; ix < nch; ix++) { + for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) { rqt = >direct_tir[ix].rqt; err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqt); if (err) @@ -1552,22 +1551,9 @@ err_destroy_rqts: for (ix--; ix >= 0; ix--) mlx5e_destroy_rqt(priv, >direct_tir[ix].rqt); - mlx5e_destroy_rqt(priv, >indir_rqt); - return err; } -static void mlx5e_destroy_rqts(struct mlx5e_priv *priv) -{ - int nch = mlx5e_get_max_num_channels(priv->mdev); - int i; - - for (i = 0; i < nch; i++) - mlx5e_destroy_rqt(priv, >direct_tir[i].rqt); - - mlx5e_destroy_rqt(priv, >indir_rqt); -} - int mlx5e_redirect_rqt(struct mlx5e_pri
[PATCH net-next V2 10/16] net/mlx5e: Add devlink based SRIOV mode changes
From: Or Gerlitz <ogerl...@mellanox.com> Implement handlers for the devlink commands to get and set the SRIOV E-Switch mode. When turning to the switchdev/offloads mode, we disable the e-switch and enable it again in the new mode, create the NIC offloads table and create VF reps. When turning to legacy mode, we remove the VF reps and the offloads table, and re-initiate the e-switch in it's legacy mode. The actual creation/removal of the VF reps is done in downstream patches. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 12 +- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 121 - 2 files changed, 124 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 1fc4cfd..12f509c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -81,8 +81,8 @@ enum { MC_ADDR_CHANGE | \ PROMISC_CHANGE) -int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports); -void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw); +int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); +void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) @@ -1561,7 +1561,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) if (mode == SRIOV_LEGACY) err = esw_create_legacy_fdb_table(esw, nvfs + 1); else - err = esw_create_offloads_fdb_table(esw, nvfs + 1); + err = esw_offloads_init(esw, nvfs + 1); if (err) goto abort; @@ -1581,6 +1581,7 @@ abort: void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) { struct esw_mc_addr *mc_promisc; + int nvports; int i; if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || @@ -1591,6 +1592,7 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw->enabled_vports, esw->mode); mc_promisc = esw->mc_promisc; + nvports = esw->enabled_vports; for (i = 0; i < esw->total_vports; i++) esw_disable_vport(esw, i); @@ -1600,8 +1602,8 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) if (esw->mode == SRIOV_LEGACY) esw_destroy_legacy_fdb_table(esw); - else - esw_destroy_offloads_fdb_table(esw); + else if (esw->mode == SRIOV_OFFLOADS) + esw_offloads_cleanup(esw, nvports); esw->mode = SRIOV_NONE; /* VPORT 0 (PF) must be enabled back with non-sriov configuration */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index e1727a9..312b6f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -112,7 +112,7 @@ out: #define MAX_PF_SQ 256 -int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) +static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_core_dev *dev = esw->dev; @@ -200,7 +200,7 @@ ns_err: return err; } -void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) +static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) { if (!esw->fdb_table.fdb) return; @@ -329,12 +329,125 @@ out: return flow_rule; } +static int esw_offloads_start(struct mlx5_eswitch *esw) +{ + int err, num_vfs = esw->dev->priv.sriov.num_vfs; + + if (esw->mode != SRIOV_LEGACY) { + esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not enabled\n"); + return -EINVAL; + } + + mlx5_eswitch_disable_sriov(esw); + err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); + if (err) + esw_warn(esw->dev, "Failed set eswitch to offloads, err %d\n", err); + return err; +} + +int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) +{ + int err; + + err = esw_create_offloads_fdb_table(esw, nvports); + if (err) + return err; + + err = esw_create_offloads_table(esw); + if (err) + goto create_ft_err; + + err = esw_create_vport_rx_group(esw); + if (err) + goto create_fg_err; + + return 0; + +create_fg_err: + esw_destroy_offloads_table(esw); + +create_ft_err: + esw_destroy_offloads_f
[PATCH net-next V2 08/16] net/devlink: Add E-Switch mode control
From: Or Gerlitz <ogerl...@mellanox.com> Add the commands to set and show the mode of SRIOV E-Switch, two modes are supported: * legacy: operating in the "old" L2 based mode (DMAC --> VF vport) * switchdev: the E-Switch is referred to as whitebox switch configured using standard tools such as tc, bridge, openvswitch etc. To allow working with the tools, for each VF, a VF representor netdevice is created by the E-Switch manager vendor device driver instance (e.g PF). Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- include/net/devlink.h| 3 ++ include/uapi/linux/devlink.h | 8 net/core/devlink.c | 87 3 files changed, 98 insertions(+) diff --git a/include/net/devlink.h b/include/net/devlink.h index 1d45b61..c99ffe8 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -90,6 +90,9 @@ struct devlink_ops { u16 tc_index, enum devlink_sb_pool_type pool_type, u32 *p_cur, u32 *p_max); + + int (*eswitch_mode_get)(struct devlink *devlink, u16 *p_mode); + int (*eswitch_mode_set)(struct devlink *devlink, u16 mode); }; static inline void *devlink_priv(struct devlink *devlink) diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index ba0073b..915bfa7 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -57,6 +57,8 @@ enum devlink_command { DEVLINK_CMD_SB_OCC_SNAPSHOT, DEVLINK_CMD_SB_OCC_MAX_CLEAR, + DEVLINK_CMD_ESWITCH_MODE_GET, + DEVLINK_CMD_ESWITCH_MODE_SET, /* add new commands above here */ __DEVLINK_CMD_MAX, @@ -95,6 +97,11 @@ enum devlink_sb_threshold_type { #define DEVLINK_SB_THRESHOLD_TO_ALPHA_MAX 20 +enum devlink_eswitch_mode { + DEVLINK_ESWITCH_MODE_LEGACY, + DEVLINK_ESWITCH_MODE_SWITCHDEV, +}; + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, @@ -125,6 +132,7 @@ enum devlink_attr { DEVLINK_ATTR_SB_TC_INDEX, /* u16 */ DEVLINK_ATTR_SB_OCC_CUR,/* u32 */ DEVLINK_ATTR_SB_OCC_MAX,/* u32 */ + DEVLINK_ATTR_ESWITCH_MODE, /* u16 */ /* add new attributes above here, update the policy in devlink.c */ diff --git a/net/core/devlink.c b/net/core/devlink.c index 933e8d4..b2e592a 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1394,6 +1394,78 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb, return -EOPNOTSUPP; } +static int devlink_eswitch_fill(struct sk_buff *msg, struct devlink *devlink, + enum devlink_command cmd, u32 portid, + u32 seq, int flags, u16 mode) +{ + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, _nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + + if (nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + const struct devlink_ops *ops = devlink->ops; + struct sk_buff *msg; + u16 mode; + int err; + + if (!ops || !ops->eswitch_mode_get) + return -EOPNOTSUPP; + + err = ops->eswitch_mode_get(devlink, ); + if (err) + return err; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_MODE_GET, + info->snd_portid, info->snd_seq, 0, mode); + + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int devlink_nl_cmd_eswitch_mode_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + const struct devlink_ops *ops = devlink->ops; + u16 mode; + + if (!info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) + return -EINVAL; + + mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]); + + if (ops && ops->eswitch_mode_set) + return ops->eswitch_mode_set(devlink, mode); + return -EOPNOTSUPP;
[PATCH net-next V2 04/16] net/mlx5: E-Switch, Add API to create send-to-vport rules
From: Or Gerlitz <ogerl...@mellanox.com> Add the API to create send-to-vport e-switch rules of the form packet meta-data :: send-queue-number == $SQN and source-vport == 0 --> $VPORT These rules are to be used for a send-to-vport logic which conceptually bypasses the "normal" steering rules currently present at the e-switch datapath. Such rule should apply only for packets that originate in the e-switch manager vport (0) and are sent for a given SQN which is used by a given VF representor device, and hence the matching logic. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 3 +- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 39 ++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 8eed33f..b7fabd1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -193,6 +193,8 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, int vport, struct ifla_vf_stats *vf_stats); +struct mlx5_flow_rule * +mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn); #define MLX5_DEBUG_ESWITCH_MASK BIT(3) @@ -204,5 +206,4 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, #define esw_debug(dev, format, ...)\ mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) - #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index e3d81ae..8964f71 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -38,6 +38,45 @@ #include "mlx5_core.h" #include "eswitch.h" +struct mlx5_flow_rule * +mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule; + int match_header = MLX5_MATCH_MISC_PARAMETERS; + u32 *match_v, *match_c; + void *misc; + + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!match_v || !match_c) { + esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n"); + flow_rule = ERR_PTR(-ENOMEM); + goto out; + } + + misc = MLX5_ADDR_OF(fte_match_param, match_v, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn); + MLX5_SET(fte_match_set_misc, misc, source_port, 0x0); /* source vport is 0 */ + + misc = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport_num = vport; + + flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, match_header, match_c, + match_v, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, ); + if (IS_ERR(flow_rule)) + esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule)); +out: + kfree(match_v); + kfree(match_c); + return flow_rule; +} + static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) { struct mlx5_flow_destination dest; -- 2.8.0
[PATCH net-next V2 07/16] net/mlx5: E-Switch, Add API to create vport rx rules
From: Or Gerlitz <ogerl...@mellanox.com> Add the API to create vport rx rules of the form packet meta-data :: vport == $VPORT --> $TIR where the TIR is opened by this VF representor. This logic will by used for packets that didn't match any rule in the e-switch datapath and should be received into the host OS through the netdevice that represents the VF they were sent from. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 4 + .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 85 ++ 2 files changed, 89 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 32db37a..cf959f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -157,6 +157,7 @@ enum { struct mlx5_esw_offload { struct mlx5_flow_table *ft_offloads; + struct mlx5_flow_group *vport_rx_group; }; struct mlx5_eswitch { @@ -201,6 +202,9 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, struct mlx5_flow_rule * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn); +struct mlx5_flow_rule * +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(dev, format, ...) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index e895c6f..7aad367 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -243,3 +243,88 @@ static void esw_destroy_offloads_table(struct mlx5_eswitch *esw) mlx5_destroy_flow_table(offloads->ft_offloads); } + +static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + struct mlx5_priv *priv = >dev->priv; + u32 *flow_group_in; + void *match_criteria, *misc; + int err = 0; + int nvports = priv->sriov.num_vfs + 2; + + flow_group_in = mlx5_vzalloc(inlen); + if (!flow_group_in) + return -ENOMEM; + + /* create vport rx group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, +MLX5_MATCH_MISC_PARAMETERS); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1); + + g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in); + + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_warn(esw->dev, "Failed to create vport rx group err %d\n", err); + goto out; + } + + esw->offloads.vport_rx_group = g; +out: + kfree(flow_group_in); + return err; +} + +static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw) +{ + mlx5_destroy_flow_group(esw->offloads.vport_rx_group); +} + +struct mlx5_flow_rule * +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule; + int match_header = MLX5_MATCH_MISC_PARAMETERS; + u32 *match_v, *match_c; + void *misc; + + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!match_v || !match_c) { + esw_warn(esw->dev, "Failed to alloc match parameters\n"); + flow_rule = ERR_PTR(-ENOMEM); + goto out; + } + + misc = MLX5_ADDR_OF(fte_match_param, match_v, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + + misc = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = tirn; + + flow_rule = mlx5_add_flow_rule(esw->offloads.ft_offloads, match_header, match_c, + match_v, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, ); + if (IS_ERR(flow_rule)) { + esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule)); +
[PATCH net-next V2 11/16] net/mlx5e: Create NIC global resources only once
From: Hadar Hen Zion <had...@mellanox.com> To allow creating more than one netdev over the same PCI function, we change the driver such that global NIC resources are created once and later be shared amongst all the mlx5e netdevs running over that port. Move the CQ UAR, PD (pdn), Transport Domain (tdn), MKey resources from being kept in the mlx5e priv part to a new resources structure (mlx5e_resources) placed under the mlx5_core device. This patch doesn't add any new functionality. Signed-off-by: Hadar Hen Zion <had...@mellanox.com> Reviewed-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 6 +- drivers/net/ethernet/mellanox/mlx5/core/en.h | 6 +- .../net/ethernet/mellanox/mlx5/core/en_common.c| 112 +++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 124 +++-- include/linux/mlx5/driver.h| 13 +++ 5 files changed, 171 insertions(+), 90 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_common.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 96f1826..9b14dad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -6,8 +6,8 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ fs_counters.o rl.o mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ - en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \ - en_rx_am.o en_txrx.o en_clock.o vxlan.o en_tc.o \ - en_arfs.o + en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \ + en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \ + en_tc.o en_arfs.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index da885c0..da93bf55 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -570,10 +570,6 @@ struct mlx5e_priv { unsigned long state; struct mutex state_lock; /* Protects Interface state */ - struct mlx5_uarcq_uar; - u32pdn; - u32tdn; - struct mlx5_core_mkey mkey; struct mlx5_core_mkey umr_mkey; struct mlx5e_rqdrop_rq; @@ -788,5 +784,7 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, #endif u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev); +int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); +void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c new file mode 100644 index 000..33b3732 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + *copyright notice, this list of conditions and the following + *disclaimer. + * + * - Redistributions in binary form must reproduce the above + *copyright notice, this list of conditions and the following + *disclaimer in the documentation and/or other materials + *provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" + +/* mlx5e global resources should be placed in this file. + * Global resources are common to all the netdevices crated on the same nic. + */ + +static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, +struct mlx5_core_mkey *mkey) +{ + struct m
Re: [PATCH net 08/13] net/mlx5e: Implement ndo_tx_timeout callback
On Thu, Jun 30, 2016 at 6:15 PM, Yuval Mintzwrote: >> Add callback to handle TX timeouts. >> >> Fixes: f62b8bb8f2d3 ('net/mlx5: Extend mlx5_core to support ConnectX-4 >> Ethernet functionality') > > Not that I mind, but does implementing ndo_tx_timeout actually counts as a > fix? Why not if you want it to get backported to -stable and distros as a resiliency fix. Maybe Dave can give some insight..
[PATCH net-next V2 06/16] net/mlx5: E-Switch, Add offloads table
From: Or Gerlitz <ogerl...@mellanox.com> Belongs to the NIC offloads name-space, and to be used as part of the SRIOV offloads logic to steer packets that hit the e-switch miss rule to the TIR of the relevant VF representor. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 5 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 31 ++ 2 files changed, 36 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index b7fabd1..32db37a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -155,6 +155,10 @@ enum { SRIOV_OFFLOADS }; +struct mlx5_esw_offload { + struct mlx5_flow_table *ft_offloads; +}; + struct mlx5_eswitch { struct mlx5_core_dev*dev; struct mlx5_l2_tablel2_table; @@ -169,6 +173,7 @@ struct mlx5_eswitch { */ struct mutexstate_lock; struct esw_mc_addr *mc_promisc; + struct mlx5_esw_offload offloads; int mode; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 8964f71..e895c6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -212,3 +212,34 @@ void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) mlx5_destroy_flow_table(esw->fdb_table.fdb); } + +static int esw_create_offloads_table(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft_offloads; + struct mlx5_core_dev *dev = esw->dev; + int err = 0; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS); + if (!ns) { + esw_warn(esw->dev, "Failed to get offloads flow namespace\n"); + return -ENOMEM; + } + + ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0); + if (IS_ERR(ft_offloads)) { + err = PTR_ERR(ft_offloads); + esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err); + return err; + } + + esw->offloads.ft_offloads = ft_offloads; + return 0; +} + +static void esw_destroy_offloads_table(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_offload *offloads = >offloads; + + mlx5_destroy_flow_table(offloads->ft_offloads); +} -- 2.8.0
[PATCH net-next V2 09/16] net/mlx5: Add devlink interface
From: Or Gerlitz <ogerl...@mellanox.com> The devlink interface is initially used to set/get the mode of the SRIOV e-switch. Currently, these are only stubs for get/set, down-stream patch will actually fill them out. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/Kconfig| 1 + drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 4 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 10 + drivers/net/ethernet/mellanox/mlx5/core/main.c | 26 ++ 4 files changed, 37 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 1cf722e..aae4688 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -4,6 +4,7 @@ config MLX5_CORE tristate "Mellanox Technologies ConnectX-4 and Connect-IB core driver" + depends on MAY_USE_DEVLINK depends on PCI default n ---help--- diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index cf959f7..7843f98 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -35,6 +35,7 @@ #include #include +#include #include #define MLX5_MAX_UC_PER_VPORT(dev) \ @@ -205,6 +206,9 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn struct mlx5_flow_rule * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); +int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode); +int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(dev, format, ...) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7aad367..e1727a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -328,3 +328,13 @@ out: kfree(match_c); return flow_rule; } + +int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode) +{ + return -EOPNOTSUPP; +} + +int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) +{ + return -EOPNOTSUPP; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 08cae34..2abd387 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -51,6 +51,7 @@ #ifdef CONFIG_RFS_ACCEL #include #endif +#include #include "mlx5_core.h" #include "fs_core.h" #ifdef CONFIG_MLX5_CORE_EN @@ -1315,19 +1316,28 @@ struct mlx5_core_event_handler { void *data); }; +static const struct devlink_ops mlx5_devlink_ops = { +#ifdef CONFIG_MLX5_CORE_EN + .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, + .eswitch_mode_get = mlx5_devlink_eswitch_mode_get, +#endif +}; static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) { struct mlx5_core_dev *dev; + struct devlink *devlink; struct mlx5_priv *priv; int err; - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) { + devlink = devlink_alloc(_devlink_ops, sizeof(*dev)); + if (!devlink) { dev_err(>dev, "kzalloc failed\n"); return -ENOMEM; } + + dev = devlink_priv(devlink); priv = >priv; priv->pci_dev_data = id->driver_data; @@ -1364,15 +1374,21 @@ static int init_one(struct pci_dev *pdev, goto clean_health; } + err = devlink_register(devlink, >dev); + if (err) + goto clean_load; + return 0; +clean_load: + mlx5_unload_one(dev, priv); clean_health: mlx5_health_cleanup(dev); close_pci: mlx5_pci_close(dev, priv); clean_dev: pci_set_drvdata(pdev, NULL); - kfree(dev); + devlink_free(devlink); return err; } @@ -1380,8 +1396,10 @@ clean_dev: static void remove_one(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct devlink *devlink = priv_to_devlink(dev); struct mlx5_priv *priv = >priv; + devlink_unregister(devlink); if (mlx5_unload_one(dev, priv)) { dev_err(>pdev->dev, "mlx5_unload_one failed\n"); mlx5_health_cleanup(dev); @@ -1390,7 +1408,7 @@ static void remove_one(struct pci_dev *pdev) mlx5_health_cleanup(dev); mlx5_pci_close(dev, priv); pci_set_drvdata(p
[PATCH net-next V2 02/16] net/mlx5: E-Switch, Add support for the sriov offloads mode
From: Or Gerlitz <ogerl...@mellanox.com> Unlike the legacy mode, here, forwarding rules are not learned by the driver per events on macs set by VFs/VMs into their vports, but rather should be programmed by higher-level SW entities. Saying that, still, in the offloads mode (SRIOV_OFFLOADS), two flow groups are created by the driver for management (slow path) purposes: The first group will be used for sending packets over e-switch vports from the host OS where the e-switch management code runs, to be received by VFs. The second group will be used by a miss rule which forwards packets toward the e-switch manager. Further logic will trap these packets such that the receiving net-device as seen by the networking stack is the representor of the vport that sent the packet over the e-switch data-path. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 35 +++--- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 16 +++ .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 135 + 4 files changed, 168 insertions(+), 20 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index c4f450f..96f1826 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -5,7 +5,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ fs_counters.o rl.o -mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \ +mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \ en_rx_am.o en_txrx.o en_clock.o vxlan.o en_tc.o \ en_arfs.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 8068dde..1fc4cfd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -40,17 +40,6 @@ #define UPLINK_VPORT 0x -#define MLX5_DEBUG_ESWITCH_MASK BIT(3) - -#define esw_info(dev, format, ...) \ - pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) - -#define esw_warn(dev, format, ...) \ - pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) - -#define esw_debug(dev, format, ...)\ - mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) - enum { MLX5_ACTION_NONE = 0, MLX5_ACTION_ADD = 1, @@ -92,6 +81,9 @@ enum { MC_ADDR_CHANGE | \ PROMISC_CHANGE) +int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports); +void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw); + static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) { @@ -578,7 +570,8 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) if (err) goto abort; - if (esw->fdb_table.fdb) /* SRIOV is enabled: Forward UC MAC to vport */ + /* SRIOV is enabled: Forward UC MAC to vport */ + if (esw->fdb_table.fdb && esw->mode == SRIOV_LEGACY) vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n", @@ -1543,7 +1536,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { int err; - int i; + int i, enabled_events; if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) @@ -1562,18 +1555,19 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n"); esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); - if (mode != SRIOV_LEGACY) - return -EINVAL; - esw->mode = mode; esw_disable_vport(esw, 0); - err = esw_create_legacy_fdb_table(esw, nvfs + 1); + if (mode == SRIOV_LEGACY) + err = esw_create_legacy_fdb_table(esw, nvfs + 1); + else + err = esw_create_offloads_fdb_table(esw, nvfs + 1); if (err) goto abort; +
[PATCH net-next V2 01/16] net/mlx5: E-Switch, Add operational mode to the SRIOV e-Switch
From: Or Gerlitz <ogerl...@mellanox.com> Define three modes for the SRIOV e-switch operation, none (SRIOV_NONE, none of the VF vports are enabled), legacy (SRIOV_LEGACY, the current mode) and sriov offloads (SRIOV_OFFLOADS). Currently, when in SRIOV, only the legacy mode is supported, where steering rules are of the form: destination mac --> VF vport This patch does not change any functionality. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 51 +-- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 19 +++-- drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 5 ++- 3 files changed, 46 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index aebbd6c..8068dde 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -428,7 +428,7 @@ esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport) return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v); } -static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) +static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_core_dev *dev = esw->dev; @@ -479,7 +479,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) esw_warn(dev, "Failed to create flow group err(%d)\n", err); goto out; } - esw->fdb_table.addr_grp = g; + esw->fdb_table.legacy.addr_grp = g; /* Allmulti group : One rule that forwards any mcast traffic */ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, @@ -494,7 +494,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err); goto out; } - esw->fdb_table.allmulti_grp = g; + esw->fdb_table.legacy.allmulti_grp = g; /* Promiscuous group : * One rule that forward all unmatched traffic from previous groups @@ -511,17 +511,17 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err); goto out; } - esw->fdb_table.promisc_grp = g; + esw->fdb_table.legacy.promisc_grp = g; out: if (err) { - if (!IS_ERR_OR_NULL(esw->fdb_table.allmulti_grp)) { - mlx5_destroy_flow_group(esw->fdb_table.allmulti_grp); - esw->fdb_table.allmulti_grp = NULL; + if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.allmulti_grp)) { + mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); + esw->fdb_table.legacy.allmulti_grp = NULL; } - if (!IS_ERR_OR_NULL(esw->fdb_table.addr_grp)) { - mlx5_destroy_flow_group(esw->fdb_table.addr_grp); - esw->fdb_table.addr_grp = NULL; + if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.addr_grp)) { + mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); + esw->fdb_table.legacy.addr_grp = NULL; } if (!IS_ERR_OR_NULL(esw->fdb_table.fdb)) { mlx5_destroy_flow_table(esw->fdb_table.fdb); @@ -533,20 +533,20 @@ out: return err; } -static void esw_destroy_fdb_table(struct mlx5_eswitch *esw) +static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw) { if (!esw->fdb_table.fdb) return; esw_debug(esw->dev, "Destroy FDB Table\n"); - mlx5_destroy_flow_group(esw->fdb_table.promisc_grp); - mlx5_destroy_flow_group(esw->fdb_table.allmulti_grp); - mlx5_destroy_flow_group(esw->fdb_table.addr_grp); + mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp); + mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); + mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); mlx5_destroy_flow_table(esw->fdb_table.fdb); esw->fdb_table.fdb = NULL; - esw->fdb_table.addr_grp = NULL; - esw->fdb_table.allmulti_grp = NULL; - esw->fdb_table.promisc_grp = NULL; + esw->fdb_table.legacy.addr_grp = NULL; + esw->fdb_table.legacy.allmulti_grp = NULL; + esw->fdb_table.legacy.promisc_grp = NULL; } /* E-Switch vport UC/MC lists management */ @@ -1540,7 +1540,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vpo
[PATCH net-next V2 00/16] Mellanox 100G SRIOV E-Switch offload and VF representors
d up with few internal callbacks that should be implemented by a profile instance. The profile for the conventional NIC is implemented, to preserve the existing functionality. The last two patches add e-switch registration API for the VF representors and the implementation of the VF representors netdevice profile. Being an mlx5e instance, the VF representor uses HW send/recv queues, completions queues and such. It currently doesn't support NIC offloads but some of them could be added later on. The VF representor has switchdev ops, where currently the only supported API is the one to the HW ID, which is needed to identify multiple representors belonging to the same e-switch. The architecture + solution (software and firmware) work were done by a team consisting of Ilya Lesokhin, Haggai Eran, Rony Efraim, Tal Anker, Natan Oppenheimer, Saeed Mahameed, Hadar and Or, thanks you all! v1 --> v2 fixes: * removed unneeded variable (patch #3) * removed unused value DEVLINK_ESWITCH_MODE_NONE (patch #8) * changed the devlink mode name from "offloads" to "switchdev" which better describes what are we referring here, using a known concept (patch #8) * correctly refer to devlink e-switch modes (patch #10) * use the correct mlx5e way to define the VF rep statistics (patch #16) Thanks, Or & Saeed. Hadar Hen Zion (6): net/mlx5e: Create NIC global resources only once net/mlx5e: TIRs management refactoring net/mlx5e: Mark enabled RQTs instances explicitly net/mlx5e: Add support for multiple profiles net/mlx5: Add Representors registration API net/mlx5e: Introduce SRIOV VF representors Or Gerlitz (10): net/mlx5: E-Switch, Add operational mode to the SRIOV e-Switch net/mlx5: E-Switch, Add support for the sriov offloads mode net/mlx5: E-Switch, Add miss rule for offloads mode net/mlx5: E-Switch, Add API to create send-to-vport rules net/mlx5: Introduce offloads steering namespace net/mlx5: E-Switch, Add offloads table net/mlx5: E-Switch, Add API to create vport rx rules net/devlink: Add E-Switch mode control net/mlx5: Add devlink interface net/mlx5e: Add devlink based SRIOV mode changes drivers/net/ethernet/mellanox/mlx5/core/Kconfig| 1 + drivers/net/ethernet/mellanox/mlx5/core/Makefile | 8 +- drivers/net/ethernet/mellanox/mlx5/core/en.h | 73 ++- drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c | 14 +- .../net/ethernet/mellanox/mlx5/core/en_common.c| 160 ++ .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 4 +- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c| 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 627 - drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 394 + drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 90 +-- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 78 ++- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 566 +++ drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 11 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 26 +- drivers/net/ethernet/mellanox/mlx5/core/sriov.c| 5 +- include/linux/mlx5/driver.h| 13 + include/linux/mlx5/fs.h| 1 + include/net/devlink.h | 3 + include/uapi/linux/devlink.h | 8 + net/core/devlink.c | 87 +++ 20 files changed, 1840 insertions(+), 331 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_common.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c -- 2.8.0
[PATCH net-next V3 13/16] net/mlx5e: Mark enabled RQTs instances explicitly
From: Hadar Hen Zion <had...@mellanox.com> In the current driver implementation two types of receive queue tables (RQTs) are in use - direct and indirect. Change the driver to mark each new created RQT (direct or indirect) as "enabled". This behaviour is needed for introducing new mlx5e instances which serve to represent SRIOV VFs. The VF representors will have only one type of RQTs (direct). An "enabled" flag is added to each RQT to allow better handling and code sharing between the representors and the nic netdevices. This patch doesn't add any new functionality. Signed-off-by: Hadar Hen Zion <had...@mellanox.com> Reviewed-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 13 +-- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 45 +- 3 files changed, 37 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 8dad50c..91c6bbe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -552,10 +552,15 @@ struct mlx5e_flow_steering { struct mlx5e_arfs_tablesarfs; }; -struct mlx5e_tir { - u32 tirn; +struct mlx5e_rqt { u32 rqtn; - struct list_head list; + bool enabled; +}; + +struct mlx5e_tir { + u32 tirn; + struct mlx5e_rqt rqt; + struct list_head list; }; enum { @@ -576,7 +581,7 @@ struct mlx5e_priv { struct mlx5e_channel **channel; u32tisn[MLX5E_MAX_NUM_TC]; - u32indir_rqtn; + struct mlx5e_rqt indir_rqt; struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; u32tx_rates[MLX5E_MAX_NUM_SQS]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 5b88967..7e61ffa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -898,7 +898,7 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, mutex_lock(>state_lock); if (indir) { - u32 rqtn = priv->indir_rqtn; + u32 rqtn = priv->indir_rqt.rqtn; memcpy(priv->params.indirection_rqt, indir, sizeof(priv->params.indirection_rqt)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 30efa8a..7f1f1ec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1486,7 +1486,8 @@ static void mlx5e_fill_direct_rqt_rqn(struct mlx5e_priv *priv, void *rqtc, MLX5_SET(rqtc, rqtc, rq_num[0], rqn); } -static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, int ix, u32 *rqtn) +static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, + int ix, struct mlx5e_rqt *rqt) { struct mlx5_core_dev *mdev = priv->mdev; void *rqtc; @@ -1509,34 +1510,37 @@ static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, int ix, u32 *rqtn) else mlx5e_fill_direct_rqt_rqn(priv, rqtc, ix); - err = mlx5_core_create_rqt(mdev, in, inlen, rqtn); + err = mlx5_core_create_rqt(mdev, in, inlen, >rqtn); + if (!err) + rqt->enabled = true; kvfree(in); return err; } -static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, u32 rqtn) +static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt) { - mlx5_core_destroy_rqt(priv->mdev, rqtn); + rqt->enabled = false; + mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn); } static int mlx5e_create_rqts(struct mlx5e_priv *priv) { int nch = mlx5e_get_max_num_channels(priv->mdev); - u32 *rqtn; + struct mlx5e_rqt *rqt; int err; int ix; /* Indirect RQT */ - rqtn = >indir_rqtn; - err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqtn); + rqt = >indir_rqt; + err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt); if (err) return err; /* Direct RQTs */ for (ix = 0; ix < nch; ix++) { - rqtn = >direct_tir[ix].rqtn; - err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqtn); + rqt = >direct_tir[ix].rqt; + err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqt); if (err) goto err_
[PATCH net-next V3 12/16] net/mlx5e: TIRs management refactoring
From: Hadar Hen Zion <had...@mellanox.com> The current refresh tirs self loopback mechanism, refreshes all the tirs belonging to the same mlx5e instance to prevent self loopback by packets sent over any ring of that instance. This mechanism relies on all the tirs/tises of an instance to be created with the same transport domain number (tdn). Change the driver to refresh all the tirs created under the same tdn regardless of which mlx5e netdev instance they belong to. This behaviour is needed for introducing new mlx5e instances which serve to represent SRIOV VFs. The representors and the PF share vport used for E-Switch management, and we want to avoid NIC level HW loopback between them, e.g when sending broadcast packets. To achieve that, both the representors and the PF NIC will share the tdn. This patch doesn't add any new functionality. Signed-off-by: Hadar Hen Zion <had...@mellanox.com> Reviewed-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 12 +++-- drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c | 14 +++--- .../net/ethernet/mellanox/mlx5/core/en_common.c| 48 +++ .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c| 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 56 +- 6 files changed, 77 insertions(+), 57 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 3226b92..8dad50c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -552,9 +552,10 @@ struct mlx5e_flow_steering { struct mlx5e_arfs_tablesarfs; }; -struct mlx5e_direct_tir { +struct mlx5e_tir { u32 tirn; u32 rqtn; + struct list_head list; }; enum { @@ -576,8 +577,8 @@ struct mlx5e_priv { struct mlx5e_channel **channel; u32tisn[MLX5E_MAX_NUM_TC]; u32indir_rqtn; - u32indir_tirn[MLX5E_NUM_INDIR_TIRS]; - struct mlx5e_direct_tirdirect_tir[MLX5E_MAX_NUM_CHANNELS]; + struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; u32tx_rates[MLX5E_MAX_NUM_SQS]; struct mlx5e_flow_steering fs; @@ -784,7 +785,12 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, #endif u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev); +int mlx5e_create_tir(struct mlx5_core_dev *mdev, +struct mlx5e_tir *tir, u32 *in, int inlen); +void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, + struct mlx5e_tir *tir); int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); +int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 3515e78..10f18d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -93,14 +93,14 @@ static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type) static int arfs_disable(struct mlx5e_priv *priv) { struct mlx5_flow_destination dest; - u32 *tirn = priv->indir_tirn; + struct mlx5e_tir *tir = priv->indir_tir; int err = 0; int tt; int i; dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; for (i = 0; i < ARFS_NUM_TYPES; i++) { - dest.tir_num = tirn[i]; + dest.tir_num = tir[i].tirn; tt = arfs_get_tt(i); /* Modify ttc rules destination to bypass the aRFS tables*/ err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt], @@ -176,7 +176,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, struct arfs_table *arfs_t = >fs.arfs.arfs_tables[type]; struct mlx5_flow_destination dest; u8 match_criteria_enable = 0; - u32 *tirn = priv->indir_tirn; + struct mlx5e_tir *tir = priv->indir_tir; u32 *match_criteria; u32 *match_value; int err = 0; @@ -192,16 +192,16 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; switch (type) { case ARFS_IPV4_TCP: - dest.tir_num = tirn[MLX5E_TT_IPV4_TCP]; + dest.tir_num = tir[MLX5E_TT_IPV4_TCP].tirn; break; case ARFS_IPV4_UDP: - dest.tir_num = tirn[MLX5E_TT_IPV4_UDP]; + dest.tir_num = tir[MLX5E_TT_IPV4_U
[PATCH net-next V3 15/16] net/mlx5: Add Representors registration API
From: Hadar Hen Zion <had...@mellanox.com> Introduce E-Switch registration/unregister representors functions. Those functions are called by the mlx5e driver when the PF NIC is created upon pci probe action regardless of the E-Switch mode (NONE, LEGACY or OFFLOADS). Adding basic E-Switch database that will hold the vport represntors upon creation. This patch doesn't add any new functionality. Signed-off-by: Hadar Hen Zion <had...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 60 +++--- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 10 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 12 + .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 19 +++ 5 files changed, 97 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index edfc9be..081259a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -571,7 +571,7 @@ enum { struct mlx5e_profile { void(*init)(struct mlx5_core_dev *mdev, struct net_device *netdev, - const struct mlx5e_profile *profile); + const struct mlx5e_profile *profile, void *ppriv); void(*cleanup)(struct mlx5e_priv *priv); int (*init_rx)(struct mlx5e_priv *priv); void(*cleanup_rx)(struct mlx5e_priv *priv); @@ -618,6 +618,7 @@ struct mlx5e_priv { struct mlx5e_tstamptstamp; u16 q_counter; const struct mlx5e_profile *profile; + void *ppriv; }; enum mlx5e_link_mode { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3e22c5e..2c9e458 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2880,7 +2880,8 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, struct net_device *netdev, - const struct mlx5e_profile *profile) + const struct mlx5e_profile *profile, + void *ppriv) { struct mlx5e_priv *priv = netdev_priv(netdev); u32 link_speed = 0; @@ -2962,6 +2963,7 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, priv->netdev = netdev; priv->params.num_channels = profile->max_nch(mdev); priv->profile = profile; + priv->ppriv= ppriv; #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_ets_init(priv); @@ -3126,18 +3128,25 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) static void mlx5e_nic_init(struct mlx5_core_dev *mdev, struct net_device *netdev, - const struct mlx5e_profile *profile) + const struct mlx5e_profile *profile, + void *ppriv) { struct mlx5e_priv *priv = netdev_priv(netdev); - mlx5e_build_nic_netdev_priv(mdev, netdev, profile); + mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv); mlx5e_build_nic_netdev(netdev); mlx5e_vxlan_init(priv); } static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + mlx5e_vxlan_cleanup(priv); + + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + mlx5_eswitch_unregister_vport_rep(esw, 0); } static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) @@ -3229,6 +3238,8 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) { struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + struct mlx5_eswitch_rep rep; if (mlx5e_vxlan_allowed(mdev)) { rtnl_lock(); @@ -3238,6 +3249,12 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5e_enable_async_events(priv); queue_work(priv->wq, >set_rx_mode_work); + + if (MLX5_CAP_GEN(mdev, vport_group_manager)) { + rep.vport = 0; + rep.priv_data = priv; + mlx5_eswitch_register_vport_rep(esw, ); + } } static void mlx5e_nic_disable(struct mlx5e_priv *priv) @@ -3261,7 +3278,7 @@ static const struct mlx5e_profile mlx5e_nic_profile = { }; static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, -
Re: [PATCH net] net: poll tx timeout only on active tx queues
On Fri, Jul 1, 2016 at 8:18 AM, Eric Dumazetwrote: > On Fri, 2016-07-01 at 04:50 +, Yuval Mintz wrote: >> > currently all the device driver call netif_tx_start_all_queues(dev) >> > on open to W/A this issue. which is strange since only >> > real_num_tx_queues are active. >> >> You could also argue that netif_tx_start_all_queues() should >> only enable the real_num_tx_queues. >> [Although that would obviously cause all drivers to reach the >> 'problem' you're currently fixing]. > > Yep. Basically what I pointed out. > > It seems inconsistent to have loops using num_tx_queues, and others > using real_num_tx_queues. > > Instead of 'fixing' one of them, we should take a deeper look, even if > the change looks fine. > > num_tx_queues should be used in code that runs once, like > netdev_lockdep_set_classes(), but other loops should probably use > real_num_tx_queues. > > Anyway all these changes should definitely target net-next, not net > tree. > Thank you Eric and Yuval, Although i slightly disagree, this patch is good as is, even with the inconsistency, which is there due to a bad design. I don't' see why new drivers need to keep copy from old wrong implementations and workarounds. But for the long term, you have a point. We will consider a deeper fix for net-next as you suggested, and drop this temporary fix. Thanks Saeed.
[PATCH net-next V3 04/16] net/mlx5: E-Switch, Add API to create send-to-vport rules
From: Or Gerlitz <ogerl...@mellanox.com> Add the API to create send-to-vport e-switch rules of the form packet meta-data :: send-queue-number == $SQN and source-vport == 0 --> $VPORT These rules are to be used for a send-to-vport logic which conceptually bypasses the "normal" steering rules currently present at the e-switch datapath. Such rule should apply only for packets that originate in the e-switch manager vport (0) and are sent for a given SQN which is used by a given VF representor device, and hence the matching logic. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 3 +- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 39 ++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 8eed33f..b7fabd1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -193,6 +193,8 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, int vport, struct ifla_vf_stats *vf_stats); +struct mlx5_flow_rule * +mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn); #define MLX5_DEBUG_ESWITCH_MASK BIT(3) @@ -204,5 +206,4 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, #define esw_debug(dev, format, ...)\ mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) - #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index e3d81ae..8964f71 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -38,6 +38,45 @@ #include "mlx5_core.h" #include "eswitch.h" +struct mlx5_flow_rule * +mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule; + int match_header = MLX5_MATCH_MISC_PARAMETERS; + u32 *match_v, *match_c; + void *misc; + + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!match_v || !match_c) { + esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n"); + flow_rule = ERR_PTR(-ENOMEM); + goto out; + } + + misc = MLX5_ADDR_OF(fte_match_param, match_v, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn); + MLX5_SET(fte_match_set_misc, misc, source_port, 0x0); /* source vport is 0 */ + + misc = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport_num = vport; + + flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, match_header, match_c, + match_v, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, ); + if (IS_ERR(flow_rule)) + esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule)); +out: + kfree(match_v); + kfree(match_c); + return flow_rule; +} + static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) { struct mlx5_flow_destination dest; -- 2.8.0
[PATCH net-next V3 01/16] net/mlx5: E-Switch, Add operational mode to the SRIOV e-Switch
From: Or Gerlitz <ogerl...@mellanox.com> Define three modes for the SRIOV e-switch operation, none (SRIOV_NONE, none of the VF vports are enabled), legacy (SRIOV_LEGACY, the current mode) and sriov offloads (SRIOV_OFFLOADS). Currently, when in SRIOV, only the legacy mode is supported, where steering rules are of the form: destination mac --> VF vport This patch does not change any functionality. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 51 +-- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 19 +++-- drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 5 ++- 3 files changed, 46 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index aebbd6c..8068dde 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -428,7 +428,7 @@ esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport) return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v); } -static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) +static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_core_dev *dev = esw->dev; @@ -479,7 +479,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) esw_warn(dev, "Failed to create flow group err(%d)\n", err); goto out; } - esw->fdb_table.addr_grp = g; + esw->fdb_table.legacy.addr_grp = g; /* Allmulti group : One rule that forwards any mcast traffic */ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, @@ -494,7 +494,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err); goto out; } - esw->fdb_table.allmulti_grp = g; + esw->fdb_table.legacy.allmulti_grp = g; /* Promiscuous group : * One rule that forward all unmatched traffic from previous groups @@ -511,17 +511,17 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err); goto out; } - esw->fdb_table.promisc_grp = g; + esw->fdb_table.legacy.promisc_grp = g; out: if (err) { - if (!IS_ERR_OR_NULL(esw->fdb_table.allmulti_grp)) { - mlx5_destroy_flow_group(esw->fdb_table.allmulti_grp); - esw->fdb_table.allmulti_grp = NULL; + if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.allmulti_grp)) { + mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); + esw->fdb_table.legacy.allmulti_grp = NULL; } - if (!IS_ERR_OR_NULL(esw->fdb_table.addr_grp)) { - mlx5_destroy_flow_group(esw->fdb_table.addr_grp); - esw->fdb_table.addr_grp = NULL; + if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.addr_grp)) { + mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); + esw->fdb_table.legacy.addr_grp = NULL; } if (!IS_ERR_OR_NULL(esw->fdb_table.fdb)) { mlx5_destroy_flow_table(esw->fdb_table.fdb); @@ -533,20 +533,20 @@ out: return err; } -static void esw_destroy_fdb_table(struct mlx5_eswitch *esw) +static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw) { if (!esw->fdb_table.fdb) return; esw_debug(esw->dev, "Destroy FDB Table\n"); - mlx5_destroy_flow_group(esw->fdb_table.promisc_grp); - mlx5_destroy_flow_group(esw->fdb_table.allmulti_grp); - mlx5_destroy_flow_group(esw->fdb_table.addr_grp); + mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp); + mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); + mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); mlx5_destroy_flow_table(esw->fdb_table.fdb); esw->fdb_table.fdb = NULL; - esw->fdb_table.addr_grp = NULL; - esw->fdb_table.allmulti_grp = NULL; - esw->fdb_table.promisc_grp = NULL; + esw->fdb_table.legacy.addr_grp = NULL; + esw->fdb_table.legacy.allmulti_grp = NULL; + esw->fdb_table.legacy.promisc_grp = NULL; } /* E-Switch vport UC/MC lists management */ @@ -1540,7 +1540,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vpo
[PATCH net-next V3 00/16] Mellanox 100G SRIOV E-Switch offload and VF representors
d up with few internal callbacks that should be implemented by a profile instance. The profile for the conventional NIC is implemented, to preserve the existing functionality. The last two patches add e-switch registration API for the VF representors and the implementation of the VF representors netdevice profile. Being an mlx5e instance, the VF representor uses HW send/recv queues, completions queues and such. It currently doesn't support NIC offloads but some of them could be added later on. The VF representor has switchdev ops, where currently the only supported API is the one to the HW ID, which is needed to identify multiple representors belonging to the same e-switch. The architecture + solution (software and firmware) work were done by a team consisting of Ilya Lesokhin, Haggai Eran, Rony Efraim, Tal Anker, Natan Oppenheimer, Saeed Mahameed, Hadar and Or, thanks you all! v1 --> v2 fixes: * removed unneeded variable (patch #3) * removed unused value DEVLINK_ESWITCH_MODE_NONE (patch #8) * changed the devlink mode name from "offloads" to "switchdev" which better describes what are we referring here, using a known concept (patch #8) * correctly refer to devlink e-switch modes (patch #10) * use the correct mlx5e way to define the VF rep statistics (patch #16) v2 --> v3 fixes: * Rebased on top 6fde0e63eccb 'be2net: signedness bug in be_msix_enable()' * Handled compilation error introduced by rebase on top "f5074d0ce2f8 Merge branch 'mlx5-100G-fixes'" * This series applies perfectly even with 'mlx5 resiliency and xmit path fixes' merged to net-next Thanks, Or & Saeed. Hadar Hen Zion (6): net/mlx5e: Create NIC global resources only once net/mlx5e: TIRs management refactoring net/mlx5e: Mark enabled RQTs instances explicitly net/mlx5e: Add support for multiple profiles net/mlx5: Add Representors registration API net/mlx5e: Introduce SRIOV VF representors Or Gerlitz (10): net/mlx5: E-Switch, Add operational mode to the SRIOV e-Switch net/mlx5: E-Switch, Add support for the sriov offloads mode net/mlx5: E-Switch, Add miss rule for offloads mode net/mlx5: E-Switch, Add API to create send-to-vport rules net/mlx5: Introduce offloads steering namespace net/mlx5: E-Switch, Add offloads table net/mlx5: E-Switch, Add API to create vport rx rules net/devlink: Add E-Switch mode control net/mlx5: Add devlink interface net/mlx5e: Add devlink based SRIOV mode changes drivers/net/ethernet/mellanox/mlx5/core/Kconfig| 1 + drivers/net/ethernet/mellanox/mlx5/core/Makefile | 8 +- drivers/net/ethernet/mellanox/mlx5/core/en.h | 73 ++- drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c | 14 +- .../net/ethernet/mellanox/mlx5/core/en_common.c| 160 ++ .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 4 +- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c| 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 627 - drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 394 + drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 90 +-- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 78 ++- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 566 +++ drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 11 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 26 +- drivers/net/ethernet/mellanox/mlx5/core/sriov.c| 5 +- include/linux/mlx5/driver.h| 13 + include/linux/mlx5/fs.h| 1 + include/net/devlink.h | 3 + include/uapi/linux/devlink.h | 8 + net/core/devlink.c | 87 +++ 20 files changed, 1840 insertions(+), 331 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_common.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c -- 2.8.0
[PATCH net-next V3 03/16] net/mlx5: E-Switch, Add miss rule for offloads mode
From: Or Gerlitz <ogerl...@mellanox.com> In the sriov offloads mode, packets that are not matched by any other rule should be sent towards the e-switch manager for further processing. Add such "miss" rule which matches ANY packet as the last rule in the e-switch FDB and programs the HW to send the packet to vport 0 where the e-switch manager runs. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 1 + .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 40 ++ 2 files changed, 41 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 2360180..8eed33f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -144,6 +144,7 @@ struct mlx5_eswitch_fdb { struct offloads_fdb { struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *miss_grp; + struct mlx5_flow_rule *miss_rule; } offloads; }; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index c6b28df..e3d81ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -38,6 +38,39 @@ #include "mlx5_core.h" #include "eswitch.h" +static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule = NULL; + u32 *match_v, *match_c; + int err = 0; + + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!match_v || !match_c) { + esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n"); + err = -ENOMEM; + goto out; + } + + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport_num = 0; + + flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, 0, match_c, match_v, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, 0, ); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + esw_warn(esw->dev, "FDB: Failed to add miss flow rule err %d\n", err); + goto out; + } + + esw->fdb_table.offloads.miss_rule = flow_rule; +out: + kfree(match_v); + kfree(match_c); + return err; +} + #define MAX_PF_SQ 256 int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) @@ -110,8 +143,14 @@ int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) } esw->fdb_table.offloads.miss_grp = g; + err = esw_add_fdb_miss_rule(esw); + if (err) + goto miss_rule_err; + return 0; +miss_rule_err: + mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); miss_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); send_vport_err: @@ -128,6 +167,7 @@ void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) return; esw_debug(esw->dev, "Destroy offloads FDB Table\n"); + mlx5_del_flow_rule(esw->fdb_table.offloads.miss_rule); mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); -- 2.8.0
[PATCH net-next V3 11/16] net/mlx5e: Create NIC global resources only once
From: Hadar Hen Zion <had...@mellanox.com> To allow creating more than one netdev over the same PCI function, we change the driver such that global NIC resources are created once and later be shared amongst all the mlx5e netdevs running over that port. Move the CQ UAR, PD (pdn), Transport Domain (tdn), MKey resources from being kept in the mlx5e priv part to a new resources structure (mlx5e_resources) placed under the mlx5_core device. This patch doesn't add any new functionality. Signed-off-by: Hadar Hen Zion <had...@mellanox.com> Reviewed-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 6 +- drivers/net/ethernet/mellanox/mlx5/core/en.h | 6 +- .../net/ethernet/mellanox/mlx5/core/en_common.c| 112 +++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 124 +++-- include/linux/mlx5/driver.h| 13 +++ 5 files changed, 171 insertions(+), 90 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_common.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 96f1826..9b14dad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -6,8 +6,8 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ fs_counters.o rl.o mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ - en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \ - en_rx_am.o en_txrx.o en_clock.o vxlan.o en_tc.o \ - en_arfs.o + en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \ + en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \ + en_tc.o en_arfs.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index b97511b..3226b92 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -570,10 +570,6 @@ struct mlx5e_priv { unsigned long state; struct mutex state_lock; /* Protects Interface state */ - struct mlx5_uarcq_uar; - u32pdn; - u32tdn; - struct mlx5_core_mkey mkey; struct mlx5_core_mkey umr_mkey; struct mlx5e_rqdrop_rq; @@ -788,5 +784,7 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, #endif u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev); +int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); +void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c new file mode 100644 index 000..33b3732 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + *copyright notice, this list of conditions and the following + *disclaimer. + * + * - Redistributions in binary form must reproduce the above + *copyright notice, this list of conditions and the following + *disclaimer in the documentation and/or other materials + *provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" + +/* mlx5e global resources should be placed in this file. + * Global resources are common to all the netdevices crated on the same nic. + */ + +static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, +struct mlx5_core_mkey *mkey) +{ + struct m
[PATCH net-next V3 09/16] net/mlx5: Add devlink interface
From: Or Gerlitz <ogerl...@mellanox.com> The devlink interface is initially used to set/get the mode of the SRIOV e-switch. Currently, these are only stubs for get/set, down-stream patch will actually fill them out. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/Kconfig| 1 + drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 4 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 10 + drivers/net/ethernet/mellanox/mlx5/core/main.c | 26 ++ 4 files changed, 37 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 1cf722e..aae4688 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -4,6 +4,7 @@ config MLX5_CORE tristate "Mellanox Technologies ConnectX-4 and Connect-IB core driver" + depends on MAY_USE_DEVLINK depends on PCI default n ---help--- diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index cf959f7..7843f98 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -35,6 +35,7 @@ #include #include +#include #include #define MLX5_MAX_UC_PER_VPORT(dev) \ @@ -205,6 +206,9 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn struct mlx5_flow_rule * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); +int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode); +int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(dev, format, ...) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7aad367..e1727a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -328,3 +328,13 @@ out: kfree(match_c); return flow_rule; } + +int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode) +{ + return -EOPNOTSUPP; +} + +int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) +{ + return -EOPNOTSUPP; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 1f3b6d6..1fb3c68 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -51,6 +51,7 @@ #ifdef CONFIG_RFS_ACCEL #include #endif +#include #include "mlx5_core.h" #include "fs_core.h" #ifdef CONFIG_MLX5_CORE_EN @@ -1315,19 +1316,28 @@ struct mlx5_core_event_handler { void *data); }; +static const struct devlink_ops mlx5_devlink_ops = { +#ifdef CONFIG_MLX5_CORE_EN + .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, + .eswitch_mode_get = mlx5_devlink_eswitch_mode_get, +#endif +}; static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) { struct mlx5_core_dev *dev; + struct devlink *devlink; struct mlx5_priv *priv; int err; - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) { + devlink = devlink_alloc(_devlink_ops, sizeof(*dev)); + if (!devlink) { dev_err(>dev, "kzalloc failed\n"); return -ENOMEM; } + + dev = devlink_priv(devlink); priv = >priv; priv->pci_dev_data = id->driver_data; @@ -1364,15 +1374,21 @@ static int init_one(struct pci_dev *pdev, goto clean_health; } + err = devlink_register(devlink, >dev); + if (err) + goto clean_load; + return 0; +clean_load: + mlx5_unload_one(dev, priv); clean_health: mlx5_health_cleanup(dev); close_pci: mlx5_pci_close(dev, priv); clean_dev: pci_set_drvdata(pdev, NULL); - kfree(dev); + devlink_free(devlink); return err; } @@ -1380,8 +1396,10 @@ clean_dev: static void remove_one(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct devlink *devlink = priv_to_devlink(dev); struct mlx5_priv *priv = >priv; + devlink_unregister(devlink); if (mlx5_unload_one(dev, priv)) { dev_err(>pdev->dev, "mlx5_unload_one failed\n"); mlx5_health_cleanup(dev); @@ -1390,7 +1408,7 @@ static void remove_one(struct pci_dev *pdev) mlx5_health_cleanup(dev); mlx5_pci_close(dev, priv); pci_set_drvdata(p
[PATCH net-next V3 02/16] net/mlx5: E-Switch, Add support for the sriov offloads mode
From: Or Gerlitz <ogerl...@mellanox.com> Unlike the legacy mode, here, forwarding rules are not learned by the driver per events on macs set by VFs/VMs into their vports, but rather should be programmed by higher-level SW entities. Saying that, still, in the offloads mode (SRIOV_OFFLOADS), two flow groups are created by the driver for management (slow path) purposes: The first group will be used for sending packets over e-switch vports from the host OS where the e-switch management code runs, to be received by VFs. The second group will be used by a miss rule which forwards packets toward the e-switch manager. Further logic will trap these packets such that the receiving net-device as seen by the networking stack is the representor of the vport that sent the packet over the e-switch data-path. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 35 +++--- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 16 +++ .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 135 + 4 files changed, 168 insertions(+), 20 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index c4f450f..96f1826 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -5,7 +5,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ fs_counters.o rl.o -mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \ +mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \ en_rx_am.o en_txrx.o en_clock.o vxlan.o en_tc.o \ en_arfs.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 8068dde..1fc4cfd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -40,17 +40,6 @@ #define UPLINK_VPORT 0x -#define MLX5_DEBUG_ESWITCH_MASK BIT(3) - -#define esw_info(dev, format, ...) \ - pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) - -#define esw_warn(dev, format, ...) \ - pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) - -#define esw_debug(dev, format, ...)\ - mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) - enum { MLX5_ACTION_NONE = 0, MLX5_ACTION_ADD = 1, @@ -92,6 +81,9 @@ enum { MC_ADDR_CHANGE | \ PROMISC_CHANGE) +int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports); +void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw); + static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) { @@ -578,7 +570,8 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) if (err) goto abort; - if (esw->fdb_table.fdb) /* SRIOV is enabled: Forward UC MAC to vport */ + /* SRIOV is enabled: Forward UC MAC to vport */ + if (esw->fdb_table.fdb && esw->mode == SRIOV_LEGACY) vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n", @@ -1543,7 +1536,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { int err; - int i; + int i, enabled_events; if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) @@ -1562,18 +1555,19 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n"); esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); - if (mode != SRIOV_LEGACY) - return -EINVAL; - esw->mode = mode; esw_disable_vport(esw, 0); - err = esw_create_legacy_fdb_table(esw, nvfs + 1); + if (mode == SRIOV_LEGACY) + err = esw_create_legacy_fdb_table(esw, nvfs + 1); + else + err = esw_create_offloads_fdb_table(esw, nvfs + 1); if (err) goto abort; +
[PATCH net-next V3 08/16] net/devlink: Add E-Switch mode control
From: Or Gerlitz <ogerl...@mellanox.com> Add the commands to set and show the mode of SRIOV E-Switch, two modes are supported: * legacy: operating in the "old" L2 based mode (DMAC --> VF vport) * switchdev: the E-Switch is referred to as whitebox switch configured using standard tools such as tc, bridge, openvswitch etc. To allow working with the tools, for each VF, a VF representor netdevice is created by the E-Switch manager vendor device driver instance (e.g PF). Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- include/net/devlink.h| 3 ++ include/uapi/linux/devlink.h | 8 net/core/devlink.c | 87 3 files changed, 98 insertions(+) diff --git a/include/net/devlink.h b/include/net/devlink.h index 1d45b61..c99ffe8 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -90,6 +90,9 @@ struct devlink_ops { u16 tc_index, enum devlink_sb_pool_type pool_type, u32 *p_cur, u32 *p_max); + + int (*eswitch_mode_get)(struct devlink *devlink, u16 *p_mode); + int (*eswitch_mode_set)(struct devlink *devlink, u16 mode); }; static inline void *devlink_priv(struct devlink *devlink) diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index ba0073b..915bfa7 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -57,6 +57,8 @@ enum devlink_command { DEVLINK_CMD_SB_OCC_SNAPSHOT, DEVLINK_CMD_SB_OCC_MAX_CLEAR, + DEVLINK_CMD_ESWITCH_MODE_GET, + DEVLINK_CMD_ESWITCH_MODE_SET, /* add new commands above here */ __DEVLINK_CMD_MAX, @@ -95,6 +97,11 @@ enum devlink_sb_threshold_type { #define DEVLINK_SB_THRESHOLD_TO_ALPHA_MAX 20 +enum devlink_eswitch_mode { + DEVLINK_ESWITCH_MODE_LEGACY, + DEVLINK_ESWITCH_MODE_SWITCHDEV, +}; + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, @@ -125,6 +132,7 @@ enum devlink_attr { DEVLINK_ATTR_SB_TC_INDEX, /* u16 */ DEVLINK_ATTR_SB_OCC_CUR,/* u32 */ DEVLINK_ATTR_SB_OCC_MAX,/* u32 */ + DEVLINK_ATTR_ESWITCH_MODE, /* u16 */ /* add new attributes above here, update the policy in devlink.c */ diff --git a/net/core/devlink.c b/net/core/devlink.c index 933e8d4..b2e592a 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1394,6 +1394,78 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb, return -EOPNOTSUPP; } +static int devlink_eswitch_fill(struct sk_buff *msg, struct devlink *devlink, + enum devlink_command cmd, u32 portid, + u32 seq, int flags, u16 mode) +{ + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, _nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + + if (nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + const struct devlink_ops *ops = devlink->ops; + struct sk_buff *msg; + u16 mode; + int err; + + if (!ops || !ops->eswitch_mode_get) + return -EOPNOTSUPP; + + err = ops->eswitch_mode_get(devlink, ); + if (err) + return err; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_MODE_GET, + info->snd_portid, info->snd_seq, 0, mode); + + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int devlink_nl_cmd_eswitch_mode_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + const struct devlink_ops *ops = devlink->ops; + u16 mode; + + if (!info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) + return -EINVAL; + + mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]); + + if (ops && ops->eswitch_mode_set) + return ops->eswitch_mode_set(devlink, mode); + return -EOPNOTSUPP;
[PATCH net-next V3 16/16] net/mlx5e: Introduce SRIOV VF representors
From: Hadar Hen Zion <had...@mellanox.com> Implement the relevant profile functions to create mlx5e driver instance serving as VF representor. When SRIOV offloads mode is enabled, each VF will have a representor netdevice instance on the host. To do that, we also export set of shared service functions from en_main.c, such that they can be used by both NIC and repsresentors netdevs. The newly created representor netdevice has a basic set of net_device_ops which are the same ndo functions as the NIC netdevice and an ndo of it's own for phys port name. The profiling infrastructure allow sharing code between the NIC and the vport representor even though the representor has only a subset of the NIC functionality. The VF reps and the PF which is used in that mode to represent the uplink, expose switchdev ops. Currently the only op supposed is attr get for the port parent ID which here serves to identify net-devices belonging to the same HW E-Switch. Other than that, no offloading is implemented and hence switching functionality is achieved if one sets SW switching rules, e.g using tc, bridge or ovs. Port phys name (ndo_get_phys_port_name) is implemented to allow exporting to user-space the VF vport number and along with the switchdev port parent id (phys_switch_id) enable a udev base consistent naming scheme: SUBSYSTEM=="net", ACTION=="add", ATTR{phys_switch_id}=="", \ ATTR{phys_port_name}!="", NAME="$PF_NIC$attr{phys_port_name}" where phys_switch_id is exposed by the PF (and VF reps) and $PF_NIC is the name of the PF netdevice. Signed-off-by: Hadar Hen Zion <had...@mellanox.com> Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en.h | 28 ++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 53 ++- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 394 + drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 20 +- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 96 - 6 files changed, 574 insertions(+), 19 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 9b14dad..a574dea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -8,6 +8,6 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \ en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \ - en_tc.o en_arfs.o + en_tc.o en_arfs.o en_rep.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 081259a..00643a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -44,6 +44,7 @@ #include #include #include +#include #include "wq.h" #include "mlx5_core.h" #include "en_stats.h" @@ -816,4 +817,31 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev); +struct mlx5_eswitch_rep; +int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, +struct mlx5_eswitch_rep *rep); +void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); +int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep); +void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); +int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); +void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); +int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr); + +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv); +void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv); +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv); +int mlx5e_create_tises(struct mlx5e_priv *priv); +void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv); +int mlx5e_close(struct net_device *netdev); +int mlx5e_open(struct net_device *netdev); +void mlx5e_update_stats_work(struct work_struct *work); +void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile, void *ppriv); +void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv); +struct rtnl_link_stats64 * +mlx5e_get_stats(struct n
[PATCH net-next V3 14/16] net/mlx5e: Add support for multiple profiles
From: Hadar Hen Zion <had...@mellanox.com> To allow support in representor netdevices where we create more than one netdevice per NIC, add profiles to the mlx5e driver. The profiling allows for creation of mlx5e instances with different characteristics. Each profile implements its own behavior using set of function pointers defined in struct mlx5e_profile. This is done to allow for avoiding complex per profix branching in the code. Currently only the profile for the conventional NIC is implemented, which is of use when a netdev is created upon pci probe. This patch doesn't add any new functionality. Signed-off-by: Hadar Hen Zion <had...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 17 ++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 341 ++ 2 files changed, 240 insertions(+), 118 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 91c6bbe..edfc9be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -568,6 +568,22 @@ enum { MLX5E_NIC_PRIO }; +struct mlx5e_profile { + void(*init)(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile); + void(*cleanup)(struct mlx5e_priv *priv); + int (*init_rx)(struct mlx5e_priv *priv); + void(*cleanup_rx)(struct mlx5e_priv *priv); + int (*init_tx)(struct mlx5e_priv *priv); + void(*cleanup_tx)(struct mlx5e_priv *priv); + void(*enable)(struct mlx5e_priv *priv); + void(*disable)(struct mlx5e_priv *priv); + void(*update_stats)(struct mlx5e_priv *priv); + int (*max_nch)(struct mlx5_core_dev *mdev); + int max_tc; +}; + struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_sq**txq_to_sq_map; @@ -601,6 +617,7 @@ struct mlx5e_priv { struct mlx5e_stats stats; struct mlx5e_tstamptstamp; u16 q_counter; + const struct mlx5e_profile *profile; }; enum mlx5e_link_mode { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 7f1f1ec..3e22c5e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -233,7 +233,7 @@ static void mlx5e_update_stats_work(struct work_struct *work) update_stats_work); mutex_lock(>state_lock); if (test_bit(MLX5E_STATE_OPENED, >state)) { - mlx5e_update_stats(priv); + priv->profile->update_stats(priv); queue_delayed_work(priv->wq, dwork, msecs_to_jiffies(MLX5E_UPDATE_STATS_INTERVAL)); } @@ -1036,7 +1036,7 @@ static void mlx5e_build_channeltc_to_txq_map(struct mlx5e_priv *priv, int ix) { int i; - for (i = 0; i < MLX5E_MAX_NUM_TC; i++) + for (i = 0; i < priv->profile->max_tc; i++) priv->channeltc_to_txq_map[ix][i] = ix + i * priv->params.num_channels; } @@ -1524,21 +1524,20 @@ static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt) mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn); } -static int mlx5e_create_rqts(struct mlx5e_priv *priv) +static int mlx5e_create_indirect_rqts(struct mlx5e_priv *priv) +{ + struct mlx5e_rqt *rqt = >indir_rqt; + + return mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt); +} + +static int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) { - int nch = mlx5e_get_max_num_channels(priv->mdev); struct mlx5e_rqt *rqt; int err; int ix; - /* Indirect RQT */ - rqt = >indir_rqt; - err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt); - if (err) - return err; - - /* Direct RQTs */ - for (ix = 0; ix < nch; ix++) { + for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) { rqt = >direct_tir[ix].rqt; err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqt); if (err) @@ -1551,22 +1550,9 @@ err_destroy_rqts: for (ix--; ix >= 0; ix--) mlx5e_destroy_rqt(priv, >direct_tir[ix].rqt); - mlx5e_destroy_rqt(priv, >indir_rqt); - return err; } -static void mlx5e_destroy_rqts(struct mlx5e_priv *priv) -{ - int nch = mlx5e_get_max_num_channels(priv->mdev); - int i; - - for (i = 0; i < nch; i++) - mlx5e_destroy_rqt(priv, >direct_tir[i].rqt); - - mlx5e_destroy_rqt(priv, >indir_rqt); -} - int mlx5e_redirect_rqt(struct mlx5e_pri
[PATCH net-next V3 07/16] net/mlx5: E-Switch, Add API to create vport rx rules
From: Or Gerlitz <ogerl...@mellanox.com> Add the API to create vport rx rules of the form packet meta-data :: vport == $VPORT --> $TIR where the TIR is opened by this VF representor. This logic will by used for packets that didn't match any rule in the e-switch datapath and should be received into the host OS through the netdevice that represents the VF they were sent from. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 4 + .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 85 ++ 2 files changed, 89 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 32db37a..cf959f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -157,6 +157,7 @@ enum { struct mlx5_esw_offload { struct mlx5_flow_table *ft_offloads; + struct mlx5_flow_group *vport_rx_group; }; struct mlx5_eswitch { @@ -201,6 +202,9 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, struct mlx5_flow_rule * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn); +struct mlx5_flow_rule * +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(dev, format, ...) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index e895c6f..7aad367 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -243,3 +243,88 @@ static void esw_destroy_offloads_table(struct mlx5_eswitch *esw) mlx5_destroy_flow_table(offloads->ft_offloads); } + +static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + struct mlx5_priv *priv = >dev->priv; + u32 *flow_group_in; + void *match_criteria, *misc; + int err = 0; + int nvports = priv->sriov.num_vfs + 2; + + flow_group_in = mlx5_vzalloc(inlen); + if (!flow_group_in) + return -ENOMEM; + + /* create vport rx group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, +MLX5_MATCH_MISC_PARAMETERS); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1); + + g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in); + + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_warn(esw->dev, "Failed to create vport rx group err %d\n", err); + goto out; + } + + esw->offloads.vport_rx_group = g; +out: + kfree(flow_group_in); + return err; +} + +static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw) +{ + mlx5_destroy_flow_group(esw->offloads.vport_rx_group); +} + +struct mlx5_flow_rule * +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule; + int match_header = MLX5_MATCH_MISC_PARAMETERS; + u32 *match_v, *match_c; + void *misc; + + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!match_v || !match_c) { + esw_warn(esw->dev, "Failed to alloc match parameters\n"); + flow_rule = ERR_PTR(-ENOMEM); + goto out; + } + + misc = MLX5_ADDR_OF(fte_match_param, match_v, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + + misc = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = tirn; + + flow_rule = mlx5_add_flow_rule(esw->offloads.ft_offloads, match_header, match_c, + match_v, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, ); + if (IS_ERR(flow_rule)) { + esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule)); +
[PATCH net-next V3 06/16] net/mlx5: E-Switch, Add offloads table
From: Or Gerlitz <ogerl...@mellanox.com> Belongs to the NIC offloads name-space, and to be used as part of the SRIOV offloads logic to steer packets that hit the e-switch miss rule to the TIR of the relevant VF representor. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 5 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 31 ++ 2 files changed, 36 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index b7fabd1..32db37a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -155,6 +155,10 @@ enum { SRIOV_OFFLOADS }; +struct mlx5_esw_offload { + struct mlx5_flow_table *ft_offloads; +}; + struct mlx5_eswitch { struct mlx5_core_dev*dev; struct mlx5_l2_tablel2_table; @@ -169,6 +173,7 @@ struct mlx5_eswitch { */ struct mutexstate_lock; struct esw_mc_addr *mc_promisc; + struct mlx5_esw_offload offloads; int mode; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 8964f71..e895c6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -212,3 +212,34 @@ void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) mlx5_destroy_flow_table(esw->fdb_table.fdb); } + +static int esw_create_offloads_table(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft_offloads; + struct mlx5_core_dev *dev = esw->dev; + int err = 0; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS); + if (!ns) { + esw_warn(esw->dev, "Failed to get offloads flow namespace\n"); + return -ENOMEM; + } + + ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0); + if (IS_ERR(ft_offloads)) { + err = PTR_ERR(ft_offloads); + esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err); + return err; + } + + esw->offloads.ft_offloads = ft_offloads; + return 0; +} + +static void esw_destroy_offloads_table(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_offload *offloads = >offloads; + + mlx5_destroy_flow_table(offloads->ft_offloads); +} -- 2.8.0
[PATCH net-next V3 05/16] net/mlx5: Introduce offloads steering namespace
From: Or Gerlitz <ogerl...@mellanox.com> Add a new namespace (MLX5_FLOW_NAMESPACE_OFFLOADS) to be populated with flow steering rules that deal with rules that have have to be executed before the EN NIC steering rules are matched. The namespace is located after the bypass name-space and before the kernel name-space. Therefore, it precedes the HW processing done for rules set for the kernel NIC name-space. Under SRIOV, it would allow us to match on e-switch missed packet and forward them to the relevant VF representor TIR. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Amir Vadai <a...@vadai.me> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 11 ++- include/linux/mlx5/fs.h | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index e912a3d..b040110 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -83,6 +83,11 @@ #define ANCHOR_NUM_LEVELS 1 #define ANCHOR_NUM_PRIOS 1 #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1) + +#define OFFLOADS_MAX_FT 1 +#define OFFLOADS_NUM_PRIOS 1 +#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1) + struct node_caps { size_t arr_sz; long*caps; @@ -98,7 +103,7 @@ static struct init_tree_node { int num_levels; } root_fs = { .type = FS_TYPE_NAMESPACE, - .ar_size = 4, + .ar_size = 5, .children = (struct init_tree_node[]) { ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), @@ -107,6 +112,9 @@ static struct init_tree_node { FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, BY_PASS_PRIO_NUM_LEVELS))), + ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {}, +ADD_NS(ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, OFFLOADS_MAX_FT))), + ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {}, ADD_NS(ADD_MULTIPLE_PRIO(1, 1), ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS, @@ -1369,6 +1377,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, switch (type) { case MLX5_FLOW_NAMESPACE_BYPASS: + case MLX5_FLOW_NAMESPACE_OFFLOADS: case MLX5_FLOW_NAMESPACE_KERNEL: case MLX5_FLOW_NAMESPACE_LEFTOVERS: case MLX5_FLOW_NAMESPACE_ANCHOR: diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 4b7a107..6ad1119 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -54,6 +54,7 @@ static inline void build_leftovers_ft_param(int *priority, enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_BYPASS, + MLX5_FLOW_NAMESPACE_OFFLOADS, MLX5_FLOW_NAMESPACE_KERNEL, MLX5_FLOW_NAMESPACE_LEFTOVERS, MLX5_FLOW_NAMESPACE_ANCHOR, -- 2.8.0
[PATCH net-next V3 10/16] net/mlx5e: Add devlink based SRIOV mode changes
From: Or Gerlitz <ogerl...@mellanox.com> Implement handlers for the devlink commands to get and set the SRIOV E-Switch mode. When turning to the switchdev/offloads mode, we disable the e-switch and enable it again in the new mode, create the NIC offloads table and create VF reps. When turning to legacy mode, we remove the VF reps and the offloads table, and re-initiate the e-switch in it's legacy mode. The actual creation/removal of the VF reps is done in downstream patches. Signed-off-by: Or Gerlitz <ogerl...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 12 +- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 121 - 2 files changed, 124 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 1fc4cfd..12f509c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -81,8 +81,8 @@ enum { MC_ADDR_CHANGE | \ PROMISC_CHANGE) -int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports); -void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw); +int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); +void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) @@ -1561,7 +1561,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) if (mode == SRIOV_LEGACY) err = esw_create_legacy_fdb_table(esw, nvfs + 1); else - err = esw_create_offloads_fdb_table(esw, nvfs + 1); + err = esw_offloads_init(esw, nvfs + 1); if (err) goto abort; @@ -1581,6 +1581,7 @@ abort: void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) { struct esw_mc_addr *mc_promisc; + int nvports; int i; if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || @@ -1591,6 +1592,7 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw->enabled_vports, esw->mode); mc_promisc = esw->mc_promisc; + nvports = esw->enabled_vports; for (i = 0; i < esw->total_vports; i++) esw_disable_vport(esw, i); @@ -1600,8 +1602,8 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) if (esw->mode == SRIOV_LEGACY) esw_destroy_legacy_fdb_table(esw); - else - esw_destroy_offloads_fdb_table(esw); + else if (esw->mode == SRIOV_OFFLOADS) + esw_offloads_cleanup(esw, nvports); esw->mode = SRIOV_NONE; /* VPORT 0 (PF) must be enabled back with non-sriov configuration */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index e1727a9..312b6f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -112,7 +112,7 @@ out: #define MAX_PF_SQ 256 -int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) +static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_core_dev *dev = esw->dev; @@ -200,7 +200,7 @@ ns_err: return err; } -void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) +static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) { if (!esw->fdb_table.fdb) return; @@ -329,12 +329,125 @@ out: return flow_rule; } +static int esw_offloads_start(struct mlx5_eswitch *esw) +{ + int err, num_vfs = esw->dev->priv.sriov.num_vfs; + + if (esw->mode != SRIOV_LEGACY) { + esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not enabled\n"); + return -EINVAL; + } + + mlx5_eswitch_disable_sriov(esw); + err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); + if (err) + esw_warn(esw->dev, "Failed set eswitch to offloads, err %d\n", err); + return err; +} + +int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) +{ + int err; + + err = esw_create_offloads_fdb_table(esw, nvports); + if (err) + return err; + + err = esw_create_offloads_table(esw); + if (err) + goto create_ft_err; + + err = esw_create_vport_rx_group(esw); + if (err) + goto create_fg_err; + + return 0; + +create_fg_err: + esw_destroy_offloads_table(esw); + +create_ft_err: + esw_destroy_offloads_f
Re: [PATCH net 00/13] Mellanox 100G mlx5 resiliency and xmit path fixes
On Fri, Jul 1, 2016 at 1:14 PM, David Miller <da...@davemloft.net> wrote: > From: Saeed Mahameed <sae...@mellanox.com> > Date: Thu, 30 Jun 2016 17:34:37 +0300 > >> This series provides two set of fixes to the mlx5 driver: >> - Resiliency fixes for reset flow and internal pci errors >> - xmit path fixes > > Series applied to 'net' but expecting all of this to be backported > to -stable is unreasonable. > Thanks Dave, One small comment on this series is that it will hit two trivial conflicts once net is merged into current net-next. Conflict applying: "net/mlx5e: Timeout if SQ doesn't flush during close": Fix: --- @@@ -810,12 -802,19 +820,19 @@@ static void mlx5e_close_sq(struct mlx5e if (mlx5e_sq_has_room_for(sq, 1)) mlx5e_send_nop(sq, true); - mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR, - false, 0); - err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, -MLX5_SQC_STATE_ERR); ++ err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR, ++false, 0); + if (err) + set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, >state); --- Conflict applying: "net/mlx5e: Handle RQ flush in error cases" Fix: --- diff --cc drivers/net/ethernet/mellanox/mlx5/core/en.h index 6db979e,b429591..000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@@ -214,7 -191,7 +214,8 @@@ struct mlx5e_tstamp enum { MLX5E_RQ_STATE_POST_WQES_ENABLE, MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, +MLX5E_RQ_STATE_AM, + MLX5E_RQ_STATE_FLUSH_TIMEOUT, }; --- Thanks, Saeed