From: Carolina Jubran <cjub...@nvidia.com>

Introduce support for managing Traffic Class (TC) arbiter nodes and
associated vports TC nodes within the E-Switch QoS hierarchy. This
patch adds support for the new scheduling node type,
`SCHED_NODE_TYPE_VPORTS_TC_TSAR`, and implements full support for
setting tc-bw on both vports and nodes.

Key changes include:

- Introduced the new scheduling node type,
  `SCHED_NODE_TYPE_VPORTS_TC_TSAR`, for managing vports within the TC
  arbiter node.

- New helper functions for creating and destroying vports TC nodes
  under the TC arbiter.

- Updated the minimum rate normalization function to skip nodes of type
  `SCHED_NODE_TYPE_VPORTS_TC_TSAR`. Vports TC TSARs have bandwidth
  shares configured on them but not minimum rates, so their `min_rate`
  cannot be normalized.

- Implementation of `esw_qos_tc_arbiter_scheduling_setup()` and
  `esw_qos_tc_arbiter_scheduling_teardown()` for initializing and
  cleaning up TC arbiter scheduling elements. These functions now fully
  support tc-bw configuration on TC arbiter nodes.

- Introduced a new helper `esw_qos_calculate_tc_bw_divider()` to
  compute the total TC bandwidth share, which is used as a divider for
  normalizing each TC's share.

- Added `esw_qos_tc_arbiter_get_bw_shares()` and
  `esw_qos_set_tc_arbiter_bw_shares()` to handle the settings of
  bandwidth shares for vports traffic class TSARs.

- `esw_qos_set_tc_arbiter_bw_shares()` normalizes  each TC share based
  on the total and the firmware's maximum allowed TSAR bandwidth share.

- Refactored `mlx5_esw_devlink_rate_node_tc_bw_set()` and
  `mlx5_esw_devlink_rate_leaf_tc_bw_set()` to fully support configuring
  tc-bw on devlink rate nodes and vports, respectively.

- Refactored `mlx5_esw_qos_node_update_parent()` to ensure that tc-bw
  configuration remains compatible with setting a parent on a rate
  node, preserving level hierarchy functionality.

- Refactored `esw_qos_calc_bw_share()` to generalize its input so it
  can be used for both minimum rate and bandwidth share calculations.

Signed-off-by: Carolina Jubran <cjub...@nvidia.com>
Reviewed-by: Cosmin Ratiu <cra...@nvidia.com>
Signed-off-by: Tariq Toukan <tar...@nvidia.com>
Signed-off-by: Mark Bloch <mbl...@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 294 +++++++++++++++++-
 1 file changed, 285 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c 
b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
index dec3bed682b7..154bbb17ec0e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -67,6 +67,7 @@ enum sched_node_type {
        SCHED_NODE_TYPE_TC_ARBITER_TSAR,
        SCHED_NODE_TYPE_RATE_LIMITER,
        SCHED_NODE_TYPE_VPORT_TC,
+       SCHED_NODE_TYPE_VPORTS_TC_TSAR,
 };
 
 static const char * const sched_node_type_str[] = {
@@ -75,6 +76,7 @@ static const char * const sched_node_type_str[] = {
        [SCHED_NODE_TYPE_TC_ARBITER_TSAR] = "TC Arbiter TSAR",
        [SCHED_NODE_TYPE_RATE_LIMITER] = "Rate Limiter",
        [SCHED_NODE_TYPE_VPORT_TC] = "vport TC",
+       [SCHED_NODE_TYPE_VPORTS_TC_TSAR] = "vports TC TSAR",
 };
 
 struct mlx5_esw_sched_node {
@@ -187,6 +189,11 @@ mlx5_esw_qos_vport_get_parent(const struct mlx5_vport 
*vport)
 static void esw_qos_sched_elem_warn(struct mlx5_esw_sched_node *node, int err, 
const char *op)
 {
        switch (node->type) {
+       case SCHED_NODE_TYPE_VPORTS_TC_TSAR:
+               esw_warn(node->esw->dev,
+                        "E-Switch %s %s scheduling element failed 
(tc=%d,err=%d)\n",
+                        op, sched_node_type_str[node->type], node->tc, err);
+               break;
        case SCHED_NODE_TYPE_VPORT_TC:
                esw_warn(node->esw->dev,
                         "E-Switch %s %s scheduling element failed 
(vport=%d,tc=%d,err=%d)\n",
@@ -345,11 +352,13 @@ static u32 esw_qos_calculate_min_rate_divider(struct 
mlx5_eswitch *esw,
        return 0;
 }
 
-static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max)
+static u32 esw_qos_calc_bw_share(u32 value, u32 divider, u32 fw_max)
 {
        if (!divider)
                return 0;
-       return min_t(u32, max_t(u32, DIV_ROUND_UP(min_rate, divider), 
MLX5_MIN_BW_SHARE), fw_max);
+       return min_t(u32, fw_max,
+                    max_t(u32,
+                          DIV_ROUND_UP(value, divider), MLX5_MIN_BW_SHARE));
 }
 
 static void esw_qos_update_sched_node_bw_share(struct mlx5_esw_sched_node 
*node,
@@ -376,7 +385,13 @@ static void esw_qos_normalize_min_rate(struct mlx5_eswitch 
*esw,
                if (node->esw != esw || node->ix == esw->qos.root_tsar_ix)
                        continue;
 
-               esw_qos_update_sched_node_bw_share(node, divider, extack);
+               /* Vports TC TSARs don't have a minimum rate configured,
+                * so there's no need to update the bw_share on them.
+                */
+               if (node->type != SCHED_NODE_TYPE_VPORTS_TC_TSAR) {
+                       esw_qos_update_sched_node_bw_share(node, divider,
+                                                          extack);
+               }
 
                if (list_empty(&node->children))
                        continue;
@@ -385,6 +400,20 @@ static void esw_qos_normalize_min_rate(struct mlx5_eswitch 
*esw,
        }
 }
 
+static u32 esw_qos_calculate_tc_bw_divider(u32 *tc_bw)
+{
+       u32 total = 0;
+       int i;
+
+       for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++)
+               total += tc_bw[i];
+
+       /* If total is zero, tc-bw config is disabled and we shouldn't reach
+        * here.
+        */
+       return WARN_ON(!total) ? 1 : total;
+}
+
 static int esw_qos_set_node_min_rate(struct mlx5_esw_sched_node *node,
                                     u32 min_rate, struct netlink_ext_ack 
*extack)
 {
@@ -527,6 +556,149 @@ static void esw_qos_destroy_node(struct 
mlx5_esw_sched_node *node, struct netlin
        __esw_qos_free_node(node);
 }
 
+static int esw_qos_create_vports_tc_node(struct mlx5_esw_sched_node *parent,
+                                        u8 tc, struct netlink_ext_ack *extack)
+{
+       u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+       struct mlx5_core_dev *dev = parent->esw->dev;
+       struct mlx5_esw_sched_node *vports_tc_node;
+       void *attr;
+       int err;
+
+       if (!mlx5_qos_element_type_supported(
+               dev,
+               SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR,
+               SCHEDULING_HIERARCHY_E_SWITCH) ||
+           !mlx5_qos_tsar_type_supported(dev,
+                                         TSAR_ELEMENT_TSAR_TYPE_DWRR,
+                                         SCHEDULING_HIERARCHY_E_SWITCH))
+               return -EOPNOTSUPP;
+
+       vports_tc_node = __esw_qos_alloc_node(parent->esw, 0,
+                                             SCHED_NODE_TYPE_VPORTS_TC_TSAR,
+                                             parent);
+       if (!vports_tc_node) {
+               NL_SET_ERR_MSG_MOD(extack, "E-Switch alloc node failed");
+               esw_warn(dev, "Failed to alloc vports TC node (tc=%d)\n", tc);
+               return -ENOMEM;
+       }
+
+       attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
+       MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR);
+       MLX5_SET(tsar_element, attr, traffic_class, tc);
+       MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, parent->ix);
+       MLX5_SET(scheduling_context, tsar_ctx, element_type,
+                SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
+
+       err = esw_qos_node_create_sched_element(vports_tc_node, tsar_ctx,
+                                               extack);
+       if (err)
+               goto err_create_sched_element;
+
+       vports_tc_node->tc = tc;
+
+       return 0;
+
+err_create_sched_element:
+       __esw_qos_free_node(vports_tc_node);
+       return err;
+}
+
+static void
+esw_qos_tc_arbiter_get_bw_shares(struct mlx5_esw_sched_node *tc_arbiter_node,
+                                u32 *tc_bw)
+{
+       struct mlx5_esw_sched_node *vports_tc_node;
+
+       list_for_each_entry(vports_tc_node, &tc_arbiter_node->children, entry)
+               tc_bw[vports_tc_node->tc] = vports_tc_node->bw_share;
+}
+
+static void
+esw_qos_set_tc_arbiter_bw_shares(struct mlx5_esw_sched_node *tc_arbiter_node,
+                                u32 *tc_bw, struct netlink_ext_ack *extack)
+{
+       struct mlx5_eswitch *esw = tc_arbiter_node->esw;
+       struct mlx5_esw_sched_node *vports_tc_node;
+       u32 divider, fw_max_bw_share;
+
+       fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+       divider = esw_qos_calculate_tc_bw_divider(tc_bw);
+       list_for_each_entry(vports_tc_node, &tc_arbiter_node->children, entry) {
+               u8 tc = vports_tc_node->tc;
+               u32 bw_share;
+
+               bw_share = tc_bw[tc] * fw_max_bw_share;
+               bw_share = esw_qos_calc_bw_share(bw_share, divider,
+                                                fw_max_bw_share);
+               esw_qos_sched_elem_config(vports_tc_node, 0, bw_share, extack);
+       }
+}
+
+static void
+esw_qos_destroy_vports_tc_nodes(struct mlx5_esw_sched_node *tc_arbiter_node,
+                               struct netlink_ext_ack *extack)
+{
+       struct mlx5_esw_sched_node *vports_tc_node, *tmp;
+
+       list_for_each_entry_safe(vports_tc_node, tmp,
+                                &tc_arbiter_node->children, entry)
+               esw_qos_destroy_node(vports_tc_node, extack);
+}
+
+static int
+esw_qos_create_vports_tc_nodes(struct mlx5_esw_sched_node *tc_arbiter_node,
+                              struct netlink_ext_ack *extack)
+{
+       struct mlx5_eswitch *esw = tc_arbiter_node->esw;
+       int err, i, num_tcs = esw_qos_num_tcs(esw->dev);
+
+       for (i = 0; i < num_tcs; i++) {
+               err = esw_qos_create_vports_tc_node(tc_arbiter_node, i, extack);
+               if (err)
+                       goto err_tc_node_create;
+       }
+
+       return 0;
+
+err_tc_node_create:
+       esw_qos_destroy_vports_tc_nodes(tc_arbiter_node, NULL);
+       return err;
+}
+
+static int esw_qos_create_tc_arbiter_sched_elem(
+               struct mlx5_esw_sched_node *tc_arbiter_node,
+               struct netlink_ext_ack *extack)
+{
+       u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+       u32 tsar_parent_ix;
+       void *attr;
+
+       if (!mlx5_qos_tsar_type_supported(tc_arbiter_node->esw->dev,
+                                         TSAR_ELEMENT_TSAR_TYPE_TC_ARB,
+                                         SCHEDULING_HIERARCHY_E_SWITCH)) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "E-Switch TC Arbiter scheduling element is 
not supported");
+               return -EOPNOTSUPP;
+       }
+
+       attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
+       MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_TC_ARB);
+       tsar_parent_ix = tc_arbiter_node->parent ? tc_arbiter_node->parent->ix :
+                        tc_arbiter_node->esw->qos.root_tsar_ix;
+       MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
+                tsar_parent_ix);
+       MLX5_SET(scheduling_context, tsar_ctx, element_type,
+                SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
+       MLX5_SET(scheduling_context, tsar_ctx, max_average_bw,
+                tc_arbiter_node->max_rate);
+       MLX5_SET(scheduling_context, tsar_ctx, bw_share,
+                tc_arbiter_node->bw_share);
+
+       return esw_qos_node_create_sched_element(tc_arbiter_node, tsar_ctx,
+                                                extack);
+}
+
 static struct mlx5_esw_sched_node *
 __esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct 
mlx5_esw_sched_node *parent,
                                   struct netlink_ext_ack *extack)
@@ -591,6 +763,9 @@ static void __esw_qos_destroy_node(struct 
mlx5_esw_sched_node *node, struct netl
 {
        struct mlx5_eswitch *esw = node->esw;
 
+       if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
+               esw_qos_destroy_vports_tc_nodes(node, extack);
+
        trace_mlx5_esw_node_qos_destroy(esw->dev, node, node->ix);
        esw_qos_destroy_node(node, extack);
        esw_qos_normalize_min_rate(esw, NULL, extack);
@@ -685,13 +860,38 @@ static void esw_qos_put(struct mlx5_eswitch *esw)
 static void
 esw_qos_tc_arbiter_scheduling_teardown(struct mlx5_esw_sched_node *node,
                                       struct netlink_ext_ack *extack)
-{}
+{
+       /* Clean up all Vports TC nodes within the TC arbiter node. */
+       esw_qos_destroy_vports_tc_nodes(node, extack);
+       /* Destroy the scheduling element for the TC arbiter node itself. */
+       esw_qos_node_destroy_sched_element(node, extack);
+}
 
 static int esw_qos_tc_arbiter_scheduling_setup(struct mlx5_esw_sched_node 
*node,
                                               struct netlink_ext_ack *extack)
 {
-       NL_SET_ERR_MSG_MOD(extack, "TC arbiter elements are not supported.");
-       return -EOPNOTSUPP;
+       u32 curr_ix = node->ix;
+       int err;
+
+       err = esw_qos_create_tc_arbiter_sched_elem(node, extack);
+       if (err)
+               return err;
+       /* Initialize the vports TC nodes within created TC arbiter TSAR. */
+       err = esw_qos_create_vports_tc_nodes(node, extack);
+       if (err)
+               goto err_vports_tc_nodes;
+
+       node->type = SCHED_NODE_TYPE_TC_ARBITER_TSAR;
+
+       return 0;
+
+err_vports_tc_nodes:
+       /* If initialization fails, clean up the scheduling element
+        * for the TC arbiter node.
+        */
+       esw_qos_node_destroy_sched_element(node, NULL);
+       node->ix = curr_ix;
+       return err;
 }
 
 static int
@@ -1064,6 +1264,7 @@ static int esw_qos_vport_update(struct mlx5_vport *vport,
 {
        struct mlx5_esw_sched_node *curr_parent = vport->qos.sched_node->parent;
        enum sched_node_type curr_type = vport->qos.sched_node->type;
+       u32 curr_tc_bw[DEVLINK_RATE_TCS_MAX] = {0};
        int err;
 
        esw_assert_qos_lock_held(vport->dev->priv.eswitch);
@@ -1075,11 +1276,23 @@ static int esw_qos_vport_update(struct mlx5_vport 
*vport,
        if (err)
                return err;
 
+       if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) {
+               esw_qos_tc_arbiter_get_bw_shares(vport->qos.sched_node,
+                                                curr_tc_bw);
+       }
+
        esw_qos_vport_disable(vport, extack);
 
        err = esw_qos_vport_enable(vport, type, parent, extack);
-       if (err)
+       if (err) {
                esw_qos_vport_enable(vport, curr_type, curr_parent, NULL);
+               extack = NULL;
+       }
+
+       if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) {
+               esw_qos_set_tc_arbiter_bw_shares(vport->qos.sched_node,
+                                                curr_tc_bw, extack);
+       }
 
        return err;
 }
@@ -1563,6 +1776,8 @@ int mlx5_esw_devlink_rate_leaf_tc_bw_set(struct 
devlink_rate *rate_leaf,
                                           SCHED_NODE_TYPE_TC_ARBITER_TSAR,
                                           NULL, extack);
        }
+       if (!err)
+               esw_qos_set_tc_arbiter_bw_shares(vport_node, tc_bw, extack);
 unlock:
        esw_qos_unlock(esw);
        return err;
@@ -1592,6 +1807,8 @@ int mlx5_esw_devlink_rate_node_tc_bw_set(struct 
devlink_rate *rate_node,
        }
 
        err = esw_qos_node_enable_tc_arbitration(node, extack);
+       if (!err)
+               esw_qos_set_tc_arbiter_bw_shares(node, tc_bw, extack);
 unlock:
        esw_qos_unlock(esw);
        return err;
@@ -1716,6 +1933,20 @@ int mlx5_esw_devlink_rate_leaf_parent_set(struct 
devlink_rate *devlink_rate,
        return mlx5_esw_qos_vport_update_parent(vport, node, extack);
 }
 
+static bool esw_qos_is_node_empty(struct mlx5_esw_sched_node *node)
+{
+       if (list_empty(&node->children))
+               return true;
+
+       if (node->type != SCHED_NODE_TYPE_TC_ARBITER_TSAR)
+               return false;
+
+       node = list_first_entry(&node->children, struct mlx5_esw_sched_node,
+                               entry);
+
+       return esw_qos_is_node_empty(node);
+}
+
 static int
 mlx5_esw_qos_node_validate_set_parent(struct mlx5_esw_sched_node *node,
                                      struct mlx5_esw_sched_node *parent,
@@ -1729,13 +1960,26 @@ mlx5_esw_qos_node_validate_set_parent(struct 
mlx5_esw_sched_node *node,
                return -EOPNOTSUPP;
        }
 
-       if (!list_empty(&node->children)) {
+       if (!esw_qos_is_node_empty(node)) {
                NL_SET_ERR_MSG_MOD(extack,
                                   "Cannot reassign a node that contains rate 
objects");
                return -EOPNOTSUPP;
        }
 
+       if (parent && parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Cannot attach a node to a parent with TC 
bandwidth configured");
+               return -EOPNOTSUPP;
+       }
+
        new_level = parent ? parent->level + 1 : 2;
+       if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
+               /* Increase by one to account for the vports TC scheduling
+                * element.
+                */
+               new_level += 1;
+       }
+
        max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth);
        if (new_level > max_level) {
                NL_SET_ERR_MSG_MOD(extack,
@@ -1746,6 +1990,32 @@ mlx5_esw_qos_node_validate_set_parent(struct 
mlx5_esw_sched_node *node,
        return 0;
 }
 
+static int
+esw_qos_tc_arbiter_node_update_parent(struct mlx5_esw_sched_node *node,
+                                     struct mlx5_esw_sched_node *parent,
+                                     struct netlink_ext_ack *extack)
+{
+       struct mlx5_esw_sched_node *curr_parent = node->parent;
+       u32 curr_tc_bw[DEVLINK_RATE_TCS_MAX] = {0};
+       struct mlx5_eswitch *esw = node->esw;
+       int err;
+
+       esw_qos_tc_arbiter_get_bw_shares(node, curr_tc_bw);
+       esw_qos_tc_arbiter_scheduling_teardown(node, extack);
+       esw_qos_node_set_parent(node, parent);
+       err = esw_qos_tc_arbiter_scheduling_setup(node, extack);
+       if (err) {
+               esw_qos_node_set_parent(node, curr_parent);
+               if (esw_qos_tc_arbiter_scheduling_setup(node, extack)) {
+                       esw_warn(esw->dev, "Node restore QoS failed\n");
+                       return err;
+               }
+       }
+       esw_qos_set_tc_arbiter_bw_shares(node, curr_tc_bw, extack);
+
+       return err;
+}
+
 static int esw_qos_vports_node_update_parent(struct mlx5_esw_sched_node *node,
                                             struct mlx5_esw_sched_node *parent,
                                             struct netlink_ext_ack *extack)
@@ -1791,7 +2061,13 @@ static int mlx5_esw_qos_node_update_parent(struct 
mlx5_esw_sched_node *node,
 
        esw_qos_lock(esw);
        curr_parent = node->parent;
-       err = esw_qos_vports_node_update_parent(node, parent, extack);
+       if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
+               err = esw_qos_tc_arbiter_node_update_parent(node, parent,
+                                                           extack);
+       } else {
+               err = esw_qos_vports_node_update_parent(node, parent, extack);
+       }
+
        if (err)
                goto out;
 
-- 
2.34.1


Reply via email to