The branch main has been updated by hselasky:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=bc531a1faa99b94b7b7761f1640304dd815eec5d

commit bc531a1faa99b94b7b7761f1640304dd815eec5d
Author:     Hans Petter Selasky <[email protected]>
AuthorDate: 2022-02-17 11:50:22 +0000
Commit:     Hans Petter Selasky <[email protected]>
CommitDate: 2022-02-17 12:13:09 +0000

    mlx5en: Improve CQE error debugging.
    
    MFC after:      1 week
    Sponsored by:   NVIDIA Networking
---
 sys/dev/mlx5/mlx5_en/en.h           |  2 ++
 sys/dev/mlx5/mlx5_en/mlx5_en_rx.c   |  1 +
 sys/dev/mlx5/mlx5_en/mlx5_en_tx.c   |  4 +++-
 sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c | 25 +++++++++++++++++++++++++
 4 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h
index fa355c68831e..36a55ff5c4d0 100644
--- a/sys/dev/mlx5/mlx5_en/en.h
+++ b/sys/dev/mlx5/mlx5_en/en.h
@@ -1205,6 +1205,8 @@ int       mlx5e_open_locked(struct ifnet *);
 int    mlx5e_close_locked(struct ifnet *);
 
 void   mlx5e_cq_error_event(struct mlx5_core_cq *mcq, int event);
+void   mlx5e_dump_err_cqe(struct mlx5e_cq *, u32, const struct mlx5_err_cqe *);
+
 mlx5e_cq_comp_t mlx5e_rx_cq_comp;
 mlx5e_cq_comp_t mlx5e_tx_cq_comp;
 struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq);
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c 
b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
index 0e3a3b3917f4..3c8813190f76 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
@@ -495,6 +495,7 @@ mlx5e_poll_rx_cq(struct mlx5e_rq *rq, int budget)
                    BUS_DMASYNC_POSTREAD);
 
                if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+                       mlx5e_dump_err_cqe(&rq->cq, rq->rqn, (const void *)cqe);
                        rq->stats.wqe_err++;
                        goto wq_ll_pop;
                }
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c 
b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c
index 9e0837a76393..78458ab69f13 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c
@@ -1045,8 +1045,10 @@ mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget)
                mlx5_cqwq_pop(&sq->cq.wq);
 
                /* check if the completion event indicates an error */
-               if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ))
+               if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
+                       mlx5e_dump_err_cqe(&sq->cq, sq->sqn, (const void *)cqe);
                        sq->stats.cqe_err++;
+               }
 
                /* setup local variables */
                sqcc_this = be16toh(cqe->wqe_counter);
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c 
b/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c
index 9f5e17ad864e..aff247f5aea2 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c
@@ -28,6 +28,8 @@
 #include "opt_rss.h"
 #include "opt_ratelimit.h"
 
+#include <linux/printk.h>
+
 #include <dev/mlx5/mlx5_en/en.h>
 
 struct mlx5_cqe64 *
@@ -54,3 +56,26 @@ mlx5e_cq_error_event(struct mlx5_core_cq *mcq, int event)
        mlx5_en_err(cq->priv->ifp, "cqn=0x%.6x event=0x%.2x\n",
            mcq->cqn, event);
 }
+
+void
+mlx5e_dump_err_cqe(struct mlx5e_cq *cq, u32 qn, const struct mlx5_err_cqe 
*err_cqe)
+{
+       u32 ci;
+
+       /* Don't print flushed in error syndromes. */
+       if (err_cqe->vendor_err_synd == 0xf9 && err_cqe->syndrome == 0x05)
+               return;
+       /* Don't print when the queue is set to error state by software. */
+       if (err_cqe->vendor_err_synd == 0xf5 && err_cqe->syndrome == 0x05)
+               return;
+
+       ci = (cq->wq.cc - 1) & cq->wq.sz_m1;
+
+       mlx5_en_err(cq->priv->ifp,
+           "Error CQE on CQN 0x%x, CI 0x%x, QN 0x%x, OPCODE 0x%x, SYNDROME 
0x%x, VENDOR SYNDROME 0x%x\n",
+           cq->mcq.cqn, ci, qn, err_cqe->op_own >> 4,
+           err_cqe->syndrome, err_cqe->vendor_err_synd);
+
+       print_hex_dump(NULL, NULL, DUMP_PREFIX_OFFSET,
+           16, 1, err_cqe, sizeof(*err_cqe), false);
+}

Reply via email to