From: Eran Ben Elisha <era...@mellanox.com>

Monitor and dump xmit error completions. In addition, add err_cqe
counter to track the number of error completion per send queue.

Signed-off-by: Eran Ben Elisha <era...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.c |  3 +++
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h |  2 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c    | 19 +++++++++++++++++++
 3 files changed, 24 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index c0dab9a8969e..ad91d9de0240 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -60,6 +60,7 @@ static const struct counter_desc sw_stats_desc[] = {
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqe_err) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
@@ -153,6 +154,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv 
*priv)
                        s->tx_queue_stopped     += sq_stats->stopped;
                        s->tx_queue_wake        += sq_stats->wake;
                        s->tx_queue_dropped     += sq_stats->dropped;
+                       s->tx_cqe_err           += sq_stats->cqe_err;
                        s->tx_xmit_more         += sq_stats->xmit_more;
                        s->tx_csum_partial_inner += 
sq_stats->csum_partial_inner;
                        s->tx_csum_none         += sq_stats->csum_none;
@@ -1103,6 +1105,7 @@ static const struct counter_desc sq_stats_desc[] = {
        { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) },
        { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) },
        { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) },
+       { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, cqe_err) },
 };
 
 static const struct counter_desc ch_stats_desc[] = {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 43a72efa28c0..43dc808684c9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -78,6 +78,7 @@ struct mlx5e_sw_stats {
        u64 tx_queue_wake;
        u64 tx_queue_dropped;
        u64 tx_xmit_more;
+       u64 tx_cqe_err;
        u64 rx_wqe_err;
        u64 rx_mpwqe_filler;
        u64 rx_buff_alloc_err;
@@ -197,6 +198,7 @@ struct mlx5e_sq_stats {
        u64 stopped;
        u64 wake;
        u64 dropped;
+       u64 cqe_err;
 };
 
 struct mlx5e_ch_stats {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 11b4f1089d1c..88b5b7bfc9a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -417,6 +417,18 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct 
net_device *dev)
        return mlx5e_sq_xmit(sq, skb, wqe, pi);
 }
 
+static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq,
+                                struct mlx5_err_cqe *err_cqe)
+{
+       u32 ci = mlx5_cqwq_get_ci(&sq->cq.wq);
+
+       netdev_err(sq->channel->netdev,
+                  "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, syndrome 0x%x, 
vendor syndrome 0x%x\n",
+                  sq->cq.mcq.cqn, ci, sq->sqn, err_cqe->syndrome,
+                  err_cqe->vendor_err_synd);
+       mlx5_dump_err_cqe(sq->cq.mdev, err_cqe);
+}
+
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 {
        struct mlx5e_txqsq *sq;
@@ -456,6 +468,13 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 
                wqe_counter = be16_to_cpu(cqe->wqe_counter);
 
+               if (unlikely(cqe->op_own >> 4 == MLX5_CQE_REQ_ERR)) {
+                       if (!sq->stats.cqe_err)
+                               mlx5e_dump_error_cqe(sq,
+                                                    (struct mlx5_err_cqe 
*)cqe);
+                       sq->stats.cqe_err++;
+               }
+
                do {
                        struct mlx5e_tx_wqe_info *wi;
                        struct sk_buff *skb;
-- 
2.14.3

Reply via email to