Distinguish between CQ catastrophic error and CQ overflow. The hardware knows which of the two happened, propagate the information via the CQ notification, rather than losing it in a trace message.
Signed-off-by: Fab Tillier <[email protected]> diff -dwup3 -x *svn* -r c:\dev\openib\ofw\gen1\branches\mlx4_30\trunk\hw\mlx4\kernel\bus\ib\cq.c .\hw\mlx4\kernel\bus\ib\cq.c --- c:\dev\openib\ofw\gen1\branches\mlx4_30\trunk\hw\mlx4\kernel\bus\ib\cq.c Thu Aug 02 16:47:19 2012 +++ .\hw\mlx4\kernel\bus\ib\cq.c Sat Jun 23 14:15:09 2012 @@ -34,7 +34,7 @@ #include "mlx4_ib.h" #include "cq.h" #include "qp.h" -#include "user.h" +#include "mx_abi.h" #if defined(EVENT_TRACING) #ifdef offsetof @@ -58,7 +58,13 @@ static void mlx4_ib_cq_event(struct mlx4 ib_event_rec_t event; struct ib_cq *ibcq; - if (type != MLX4_EVENT_TYPE_CQ_ERROR) { + if (type == MLX4_EVENT_TYPE_CQ_ERROR) { + event.type = IB_AE_CQ_ERROR; + } + else if (type == MLX4_EVENT_TYPE_CQ_OVERFLOW) { + event.type = IB_AE_CQ_OVERFLOW; + } + else { MLX4_PRINT(TRACE_LEVEL_WARNING, MLX4_DBG_DRV,( "mlx4_ib: Unexpected event type %d " "on CQ %06x\n", type, cq->cqn)); return; @@ -66,7 +72,6 @@ static void mlx4_ib_cq_event(struct mlx4 ibcq = &to_mibcq(cq)->ibcq; if (ibcq->event_handler) { - event.type = (ib_async_event_t)IB_EVENT_CQ_ERR; event.context = ibcq->cq_context; event.vendor_specific = type; ibcq->event_handler(&event); diff -dwup3 -x *svn* -r c:\dev\openib\ofw\gen1\branches\mlx4_30\trunk\hw\mlx4\kernel\bus\inc\device.h .\hw\mlx4\kernel\bus\inc\device.h --- c:\dev\openib\ofw\gen1\branches\mlx4_30\trunk\hw\mlx4\kernel\bus\inc\device.h Thu Mar 29 00:15:28 2012 +++ .\hw\mlx4\kernel\bus\inc\device.h Thu Jul 26 15:31:14 2012 @@ -111,6 +111,7 @@ enum mlx4_event { MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR = 0x08, MLX4_EVENT_TYPE_PORT_CHANGE = 0x09, MLX4_EVENT_TYPE_CMD = 0x0a, + MLX4_EVENT_TYPE_CQ_OVERFLOW = 0x0c, MLX4_EVENT_TYPE_ECC_DETECT = 0x0e, MLX4_EVENT_TYPE_EQ_OVERFLOW = 0x0f, MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10, @@ -120,11 +121,11 @@ enum mlx4_event { MLX4_EVENT_TYPE_SRQ_LIMIT = 0x14, MLX4_EVENT_TYPE_COMM_CHANNEL = 0x18, MLX4_EVENT_TYPE_VEP_UPDATE = 0x19, + MLX4_EVENT_TYPE_OP_REQUIRED = 0x1a, MLX4_EVENT_TYPE_MAC_UPDATE = 0x20, MLX4_EVENT_TYPE_PPF_REMOVE = 0xf0, MLX4_EVENT_TYPE_SQP_UPDATE = 0xfe, - MLX4_EVENT_TYPE_NONE = 0xff, - MLX4_EVENT_TYPE_OP_REQUIRED = 0x1a + MLX4_EVENT_TYPE_NONE = 0xff }; enum { diff -dwup3 -x *svn* -r c:\dev\openib\ofw\gen1\branches\mlx4_30\trunk\hw\mlx4\kernel\bus\net\eq.c .\hw\mlx4\kernel\bus\net\eq.c --- c:\dev\openib\ofw\gen1\branches\mlx4_30\trunk\hw\mlx4\kernel\bus\net\eq.c Thu Mar 29 00:15:28 2012 +++ .\hw\mlx4\kernel\bus\net\eq.c Thu Jul 26 15:31:14 2012 @@ -521,18 +521,24 @@ static int mlx4_eq_int(struct mlx4_dev * break; case MLX4_EVENT_TYPE_CQ_ERROR: - MLX4_PRINT(TRACE_LEVEL_WARNING, MLX4_DBG_DRV,( "%s: CQ %s on CQN %06x\n", + if (eqe->event.cq_err.syndrome == 1) { + MLX4_PRINT(TRACE_LEVEL_WARNING, MLX4_DBG_DRV,( "%s: CQ overrun on CQN %06x\n", + dev->pdev->name, + be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff)); + eqe->type = MLX4_EVENT_TYPE_CQ_OVERFLOW; + } else { + MLX4_PRINT(TRACE_LEVEL_WARNING, MLX4_DBG_DRV,( "%s: CQ access violation on CQN %06x\n", dev->pdev->name, - eqe->event.cq_err.syndrome == 1 ? - "overrun" : "access violation", be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff)); + } if (mlx4_is_master(dev)) { /* TODO: forward only to slave owning the CQ */ ret = mlx4_get_slave_from_resource_id(dev, RES_CQ, eqe->event.cq_err.cqn, &slave); if (!ret) mlx4_slave_event(dev, slave, eqe); - } else + } else { mlx4_cq_event(dev, eq, be32_to_cpu(eqe->event.cq_err.cqn), eqe->type); + } break; case MLX4_EVENT_TYPE_EQ_OVERFLOW: diff -dwup3 -x *svn* -r c:\dev\openib\ofw\gen1\branches\mlx4_30\trunk\inc\iba\ib_types.h .\inc\iba\ib_types.h --- c:\dev\openib\ofw\gen1\branches\mlx4_30\trunk\inc\iba\ib_types.h Thu Aug 02 13:08:45 2012 +++ .\inc\iba\ib_types.h Thu Jul 26 15:31:14 2012 @@ -9622,6 +9622,7 @@ typedef enum _ib_async_event_t { IB_AE_SM_CHANGE, IB_AE_GID_CHANGE, IB_AE_RESET_4_RMV, + IB_AE_CQ_OVERFLOW, IB_AE_UNKNOWN /* ALWAYS LAST ENUM VALUE */ } ib_async_event_t; diff -dwup3 -x *svn* -r c:\dev\openib\ofw\gen1\branches\mlx4_30\trunk\core\al\ib_statustext.c .\core\al\ib_statustext.c --- c:\dev\openib\ofw\gen1\branches\mlx4_30\trunk\core\al\ib_statustext.c Thu May 31 11:22:16 2012 +++ .\core\al\ib_statustext.c Wed May 23 18:26:47 2012 @@ -151,6 +151,7 @@ static const char* const __ib_async_even "IB_AE_SM_CHANGE", "IB_AE_GID_CHANGE", "IB_AE_RESET_4_RMV", + "IB_AE_CQ_OVERFLOW", "IB_AE_UNKNOWN" }; @@ -229,6 +230,7 @@ ib_get_wc_type_str( else return __ib_wc_send_type_str[wc_type]; } + static const char* const __ib_wr_type_str[] = {
ndv2.14.patch
Description: ndv2.14.patch
_______________________________________________ ofw mailing list [email protected] http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ofw
