Author: davidcs
Date: Tue Mar  6 23:17:56 2018
New Revision: 330556
URL: https://svnweb.freebsd.org/changeset/base/330556

Log:
  MFC r329855
    1. Added support to offline a port if is error recovery on successful.
    2. Sysctls to enable/disable driver_state_dump and error_recovery.
    3. Sysctl to control the delay between hw/fw reinitialization and
       restarting the fastpath.
    4. Stop periodic stats retrieval if interface has IFF_DRV_RUNNING flag off.
    5. Print contents of PEG_HALT_STATUS1 and PEG_HALT_STATUS2 on heartbeat
       failure.
    6. Speed up slowpath shutdown during error recovery.
    7. link_state update using atomic_store.
    8. Added timestamp information on driver state and minidump captures.
    9. Added support for Slowpath event logging
    10.Added additional failure injection types to simulate failures.

Modified:
  stable/10/sys/dev/qlxgbe/ql_dbg.h
  stable/10/sys/dev/qlxgbe/ql_def.h
  stable/10/sys/dev/qlxgbe/ql_glbl.h
  stable/10/sys/dev/qlxgbe/ql_hw.c
  stable/10/sys/dev/qlxgbe/ql_hw.h
  stable/10/sys/dev/qlxgbe/ql_inline.h
  stable/10/sys/dev/qlxgbe/ql_ioctl.c
  stable/10/sys/dev/qlxgbe/ql_ioctl.h
  stable/10/sys/dev/qlxgbe/ql_isr.c
  stable/10/sys/dev/qlxgbe/ql_misc.c
  stable/10/sys/dev/qlxgbe/ql_os.c
  stable/10/sys/dev/qlxgbe/ql_os.h
  stable/10/sys/dev/qlxgbe/ql_ver.h
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/sys/dev/qlxgbe/ql_dbg.h
==============================================================================
--- stable/10/sys/dev/qlxgbe/ql_dbg.h   Tue Mar  6 23:12:32 2018        
(r330555)
+++ stable/10/sys/dev/qlxgbe/ql_dbg.h   Tue Mar  6 23:17:56 2018        
(r330556)
@@ -42,17 +42,21 @@ extern void ql_dump_buf16(qla_host_t *ha, const char *
 extern void ql_dump_buf32(qla_host_t *ha, const char *str, void *dbuf,
                uint32_t len32);
 
-#define INJCT_RX_RXB_INVAL             0x00001
-#define INJCT_RX_MP_NULL               0x00002
-#define INJCT_LRO_RXB_INVAL            0x00003
-#define INJCT_LRO_MP_NULL              0x00004
-#define INJCT_NUM_HNDLE_INVALID                0x00005
-#define INJCT_RDWR_INDREG_FAILURE      0x00006
-#define INJCT_RDWR_OFFCHIPMEM_FAILURE  0x00007
-#define INJCT_MBX_CMD_FAILURE          0x00008
-#define INJCT_HEARTBEAT_FAILURE                0x00009
-#define INJCT_TEMPERATURE_FAILURE      0x0000A
-#define INJCT_M_GETCL_M_GETJCL_FAILURE 0x0000B
+#define INJCT_RX_RXB_INVAL                             0x00001
+#define INJCT_RX_MP_NULL                               0x00002
+#define INJCT_LRO_RXB_INVAL                            0x00003
+#define INJCT_LRO_MP_NULL                              0x00004
+#define INJCT_NUM_HNDLE_INVALID                                0x00005
+#define INJCT_RDWR_INDREG_FAILURE                      0x00006
+#define INJCT_RDWR_OFFCHIPMEM_FAILURE                  0x00007
+#define INJCT_MBX_CMD_FAILURE                          0x00008
+#define INJCT_HEARTBEAT_FAILURE                                0x00009
+#define INJCT_TEMPERATURE_FAILURE                      0x0000A
+#define INJCT_M_GETCL_M_GETJCL_FAILURE                 0x0000B
+#define INJCT_INV_CONT_OPCODE                          0x0000C
+#define INJCT_SGL_RCV_INV_DESC_COUNT                   0x0000D
+#define INJCT_SGL_LRO_INV_DESC_COUNT                   0x0000E
+#define INJCT_PEER_PORT_FAILURE_ERR_RECOVERY           0x0000F
 
 #ifdef QL_DBG
 

Modified: stable/10/sys/dev/qlxgbe/ql_def.h
==============================================================================
--- stable/10/sys/dev/qlxgbe/ql_def.h   Tue Mar  6 23:12:32 2018        
(r330555)
+++ stable/10/sys/dev/qlxgbe/ql_def.h   Tue Mar  6 23:17:56 2018        
(r330556)
@@ -144,12 +144,12 @@ struct qla_host {
        volatile uint32_t       qla_watchdog_paused;
        volatile uint32_t       qla_initiate_recovery;
        volatile uint32_t       qla_detach_active;
+       volatile uint32_t       offline;
 
        device_t                pci_dev;
 
-       uint16_t                watchdog_ticks;
+       volatile uint16_t       watchdog_ticks;
        uint8_t                 pci_func;
-       uint8_t                 resvd;
 
         /* ioctl related */
         struct cdev             *ioctl_dev;
@@ -182,6 +182,7 @@ struct qla_host {
 
        /* hardware access lock */
 
+       struct mtx              sp_log_lock;
        struct mtx              hw_lock;
        volatile uint32_t       hw_lock_held;
        uint64_t                hw_lock_failed;
@@ -239,6 +240,9 @@ struct qla_host {
        volatile const char     *qla_unlock;
        uint32_t                dbg_level;
        uint32_t                enable_minidump;
+       uint32_t                enable_driverstate_dump;
+       uint32_t                enable_error_recovery;
+       uint32_t                ms_delay_after_init;
 
        uint8_t                 fw_ver_str[32];
 
@@ -272,5 +276,7 @@ typedef struct qla_host qla_host_t;
 #define QL_MAC_CMP(mac1, mac2)    \
        ((((*(uint32_t *) mac1) == (*(uint32_t *) mac2) && \
        (*(uint16_t *)(mac1 + 4)) == (*(uint16_t *)(mac2 + 4)))) ? 0 : 1)
+
+#define QL_INITIATE_RECOVERY(ha) qla_set_error_recovery(ha)
 
 #endif /* #ifndef _QL_DEF_H_ */

Modified: stable/10/sys/dev/qlxgbe/ql_glbl.h
==============================================================================
--- stable/10/sys/dev/qlxgbe/ql_glbl.h  Tue Mar  6 23:12:32 2018        
(r330555)
+++ stable/10/sys/dev/qlxgbe/ql_glbl.h  Tue Mar  6 23:17:56 2018        
(r330556)
@@ -47,6 +47,7 @@ extern uint32_t ql_rcv_isr(qla_host_t *ha, uint32_t sd
 extern int ql_alloc_dmabuf(qla_host_t *ha, qla_dma_t *dma_buf);
 extern void ql_free_dmabuf(qla_host_t *ha, qla_dma_t *dma_buf);
 extern int ql_get_mbuf(qla_host_t *ha, qla_rx_buf_t *rxb, struct mbuf *nmp);
+extern void qla_set_error_recovery(qla_host_t *ha);
 
 /*
  * from ql_hw.c
@@ -115,5 +116,11 @@ extern unsigned int ql83xx_minidump_len;
 extern void ql_alloc_drvr_state_buffer(qla_host_t *ha);
 extern void ql_free_drvr_state_buffer(qla_host_t *ha);
 extern void ql_capture_drvr_state(qla_host_t *ha);
+extern void ql_sp_log(qla_host_t *ha, uint16_t fmtstr_idx, uint16_t num_params,
+               uint32_t param0, uint32_t param1, uint32_t param2,
+               uint32_t param3, uint32_t param4);
+extern void ql_alloc_sp_log_buffer(qla_host_t *ha);
+extern void ql_free_sp_log_buffer(qla_host_t *ha);
+
 
 #endif /* #ifndef_QL_GLBL_H_ */

Modified: stable/10/sys/dev/qlxgbe/ql_hw.c
==============================================================================
--- stable/10/sys/dev/qlxgbe/ql_hw.c    Tue Mar  6 23:12:32 2018        
(r330555)
+++ stable/10/sys/dev/qlxgbe/ql_hw.c    Tue Mar  6 23:17:56 2018        
(r330556)
@@ -49,7 +49,7 @@ __FBSDID("$FreeBSD$");
 
 static void qla_del_rcv_cntxt(qla_host_t *ha);
 static int qla_init_rcv_cntxt(qla_host_t *ha);
-static void qla_del_xmt_cntxt(qla_host_t *ha);
+static int qla_del_xmt_cntxt(qla_host_t *ha);
 static int qla_init_xmt_cntxt(qla_host_t *ha);
 static int qla_mbx_cmd(qla_host_t *ha, uint32_t *h_mbox, uint32_t n_hmbox,
        uint32_t *fw_mbox, uint32_t n_fwmbox, uint32_t no_pause);
@@ -647,11 +647,118 @@ qlnx_add_hw_xmt_stats_sysctls(qla_host_t *ha)
 }
 
 static void
+qlnx_add_hw_mbx_cmpl_stats_sysctls(qla_host_t *ha)
+{
+        struct sysctl_ctx_list  *ctx;
+        struct sysctl_oid_list  *node_children;
+
+        ctx = device_get_sysctl_ctx(ha->pci_dev);
+        node_children = SYSCTL_CHILDREN(device_get_sysctl_tree(ha->pci_dev));
+
+       SYSCTL_ADD_QUAD(ctx, node_children,
+               OID_AUTO, "mbx_completion_time_lt_200ms",
+               CTLFLAG_RD, &ha->hw.mbx_comp_msecs[0],
+               "mbx_completion_time_lt_200ms");
+
+       SYSCTL_ADD_QUAD(ctx, node_children,
+               OID_AUTO, "mbx_completion_time_200ms_400ms",
+               CTLFLAG_RD, &ha->hw.mbx_comp_msecs[1],
+               "mbx_completion_time_200ms_400ms");
+
+       SYSCTL_ADD_QUAD(ctx, node_children,
+               OID_AUTO, "mbx_completion_time_400ms_600ms",
+               CTLFLAG_RD, &ha->hw.mbx_comp_msecs[2],
+               "mbx_completion_time_400ms_600ms");
+
+       SYSCTL_ADD_QUAD(ctx, node_children,
+               OID_AUTO, "mbx_completion_time_600ms_800ms",
+               CTLFLAG_RD, &ha->hw.mbx_comp_msecs[3],
+               "mbx_completion_time_600ms_800ms");
+
+       SYSCTL_ADD_QUAD(ctx, node_children,
+               OID_AUTO, "mbx_completion_time_800ms_1000ms",
+               CTLFLAG_RD, &ha->hw.mbx_comp_msecs[4],
+               "mbx_completion_time_800ms_1000ms");
+
+       SYSCTL_ADD_QUAD(ctx, node_children,
+               OID_AUTO, "mbx_completion_time_1000ms_1200ms",
+               CTLFLAG_RD, &ha->hw.mbx_comp_msecs[5],
+               "mbx_completion_time_1000ms_1200ms");
+
+       SYSCTL_ADD_QUAD(ctx, node_children,
+               OID_AUTO, "mbx_completion_time_1200ms_1400ms",
+               CTLFLAG_RD, &ha->hw.mbx_comp_msecs[6],
+               "mbx_completion_time_1200ms_1400ms");
+
+       SYSCTL_ADD_QUAD(ctx, node_children,
+               OID_AUTO, "mbx_completion_time_1400ms_1600ms",
+               CTLFLAG_RD, &ha->hw.mbx_comp_msecs[7],
+               "mbx_completion_time_1400ms_1600ms");
+
+       SYSCTL_ADD_QUAD(ctx, node_children,
+               OID_AUTO, "mbx_completion_time_1600ms_1800ms",
+               CTLFLAG_RD, &ha->hw.mbx_comp_msecs[8],
+               "mbx_completion_time_1600ms_1800ms");
+
+       SYSCTL_ADD_QUAD(ctx, node_children,
+               OID_AUTO, "mbx_completion_time_1800ms_2000ms",
+               CTLFLAG_RD, &ha->hw.mbx_comp_msecs[9],
+               "mbx_completion_time_1800ms_2000ms");
+
+       SYSCTL_ADD_QUAD(ctx, node_children,
+               OID_AUTO, "mbx_completion_time_2000ms_2200ms",
+               CTLFLAG_RD, &ha->hw.mbx_comp_msecs[10],
+               "mbx_completion_time_2000ms_2200ms");
+
+       SYSCTL_ADD_QUAD(ctx, node_children,
+               OID_AUTO, "mbx_completion_time_2200ms_2400ms",
+               CTLFLAG_RD, &ha->hw.mbx_comp_msecs[11],
+               "mbx_completion_time_2200ms_2400ms");
+
+       SYSCTL_ADD_QUAD(ctx, node_children,
+               OID_AUTO, "mbx_completion_time_2400ms_2600ms",
+               CTLFLAG_RD, &ha->hw.mbx_comp_msecs[12],
+               "mbx_completion_time_2400ms_2600ms");
+
+       SYSCTL_ADD_QUAD(ctx, node_children,
+               OID_AUTO, "mbx_completion_time_2600ms_2800ms",
+               CTLFLAG_RD, &ha->hw.mbx_comp_msecs[13],
+               "mbx_completion_time_2600ms_2800ms");
+
+       SYSCTL_ADD_QUAD(ctx, node_children,
+               OID_AUTO, "mbx_completion_time_2800ms_3000ms",
+               CTLFLAG_RD, &ha->hw.mbx_comp_msecs[14],
+               "mbx_completion_time_2800ms_3000ms");
+
+       SYSCTL_ADD_QUAD(ctx, node_children,
+               OID_AUTO, "mbx_completion_time_3000ms_4000ms",
+               CTLFLAG_RD, &ha->hw.mbx_comp_msecs[15],
+               "mbx_completion_time_3000ms_4000ms");
+
+       SYSCTL_ADD_QUAD(ctx, node_children,
+               OID_AUTO, "mbx_completion_time_4000ms_5000ms",
+               CTLFLAG_RD, &ha->hw.mbx_comp_msecs[16],
+               "mbx_completion_time_4000ms_5000ms");
+
+       SYSCTL_ADD_QUAD(ctx, node_children,
+               OID_AUTO, "mbx_completion_host_mbx_cntrl_timeout",
+               CTLFLAG_RD, &ha->hw.mbx_comp_msecs[17],
+               "mbx_completion_host_mbx_cntrl_timeout");
+
+       SYSCTL_ADD_QUAD(ctx, node_children,
+               OID_AUTO, "mbx_completion_fw_mbx_cntrl_timeout",
+               CTLFLAG_RD, &ha->hw.mbx_comp_msecs[18],
+               "mbx_completion_fw_mbx_cntrl_timeout");
+       return;
+}
+
+static void
 qlnx_add_hw_stats_sysctls(qla_host_t *ha)
 {
        qlnx_add_hw_mac_stats_sysctls(ha);
        qlnx_add_hw_rcv_stats_sysctls(ha);
        qlnx_add_hw_xmt_stats_sysctls(ha);
+       qlnx_add_hw_mbx_cmpl_stats_sysctls(ha);
 
        return;
 }
@@ -918,6 +1025,30 @@ ql_hw_add_sysctls(qla_host_t *ha)
                "\t Any change requires ifconfig down/up to take effect\n"
                "\t Note that LRO may be turned off/on via ifconfig\n");
 
+        SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev),
+                SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+                OID_AUTO, "sp_log_index", CTLFLAG_RW, &ha->hw.sp_log_index,
+                ha->hw.sp_log_index, "sp_log_index");
+
+        SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev),
+                SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+                OID_AUTO, "sp_log_stop", CTLFLAG_RW, &ha->hw.sp_log_stop,
+                ha->hw.sp_log_stop, "sp_log_stop");
+
+        ha->hw.sp_log_stop_events = 0;
+
+        SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev),
+                SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+                OID_AUTO, "sp_log_stop_events", CTLFLAG_RW,
+               &ha->hw.sp_log_stop_events,
+                ha->hw.sp_log_stop_events, "Slow path event log is stopped"
+               " when OR of the following events occur \n"
+               "\t 0x01 : Heart beat Failure\n"
+               "\t 0x02 : Temperature Failure\n"
+               "\t 0x04 : HW Initialization Failure\n"
+               "\t 0x08 : Interface Initialization Failure\n"
+               "\t 0x10 : Error Recovery Failure\n");
+
        ha->hw.mdump_active = 0;
         SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev),
                 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
@@ -998,13 +1129,13 @@ ql_hw_link_status(qla_host_t *ha)
                device_printf(ha->pci_dev, "link Down\n");
        }
 
-       if (ha->hw.flags.fduplex) {
+       if (ha->hw.fduplex) {
                device_printf(ha->pci_dev, "Full Duplex\n");
        } else {
                device_printf(ha->pci_dev, "Half Duplex\n");
        }
 
-       if (ha->hw.flags.autoneg) {
+       if (ha->hw.autoneg) {
                device_printf(ha->pci_dev, "Auto Negotiation Enabled\n");
        } else {
                device_printf(ha->pci_dev, "Auto Negotiation Disabled\n");
@@ -1255,19 +1386,39 @@ qla_mbx_cmd(qla_host_t *ha, uint32_t *h_mbox, uint32_t
        uint32_t i;
        uint32_t data;
        int ret = 0;
+       uint64_t start_usecs;
+       uint64_t end_usecs;
+       uint64_t msecs_200;
 
-       if (QL_ERR_INJECT(ha, INJCT_MBX_CMD_FAILURE)) {
+       ql_sp_log(ha, 0, 5, no_pause, h_mbox[0], h_mbox[1], h_mbox[2], 
h_mbox[3]);
+
+       if (ha->offline || ha->qla_initiate_recovery) {
+               ql_sp_log(ha, 1, 2, ha->offline, ha->qla_initiate_recovery, 0, 
0, 0);
+               goto exit_qla_mbx_cmd;
+       }
+
+       if (((ha->err_inject & 0xFFFF) == INJCT_MBX_CMD_FAILURE) &&
+               (((ha->err_inject & ~0xFFFF) == ((h_mbox[0] & 0xFFFF) << 16))||
+               !(ha->err_inject & ~0xFFFF))) {
                ret = -3;
-               ha->qla_initiate_recovery = 1;
+               QL_INITIATE_RECOVERY(ha);
                goto exit_qla_mbx_cmd;
        }
 
+       start_usecs = qla_get_usec_timestamp();
+
        if (no_pause)
                i = 1000;
        else
                i = Q8_MBX_MSEC_DELAY;
 
        while (i) {
+
+               if (ha->qla_initiate_recovery) {
+                       ql_sp_log(ha, 2, 1, ha->qla_initiate_recovery, 0, 0, 0, 
0);
+                       return (-1);
+               }
+
                data = READ_REG32(ha, Q8_HOST_MBOX_CNTRL);
                if (data == 0)
                        break;
@@ -1282,8 +1433,10 @@ qla_mbx_cmd(qla_host_t *ha, uint32_t *h_mbox, uint32_t
        if (i == 0) {
                device_printf(ha->pci_dev, "%s: host_mbx_cntrl 0x%08x\n",
                        __func__, data);
+               ql_sp_log(ha, 3, 1, data, 0, 0, 0, 0);
                ret = -1;
-               ha->qla_initiate_recovery = 1;
+               ha->hw.mbx_comp_msecs[(Q8_MBX_COMP_MSECS - 2)]++;
+               QL_INITIATE_RECOVERY(ha);
                goto exit_qla_mbx_cmd;
        }
 
@@ -1297,6 +1450,12 @@ qla_mbx_cmd(qla_host_t *ha, uint32_t *h_mbox, uint32_t
 
        i = Q8_MBX_MSEC_DELAY;
        while (i) {
+
+               if (ha->qla_initiate_recovery) {
+                       ql_sp_log(ha, 4, 1, ha->qla_initiate_recovery, 0, 0, 0, 
0);
+                       return (-1);
+               }
+
                data = READ_REG32(ha, Q8_FW_MBOX_CNTRL);
 
                if ((data & 0x3) == 1) {
@@ -1314,18 +1473,44 @@ qla_mbx_cmd(qla_host_t *ha, uint32_t *h_mbox, uint32_t
        if (i == 0) {
                device_printf(ha->pci_dev, "%s: fw_mbx_cntrl 0x%08x\n",
                        __func__, data);
+               ql_sp_log(ha, 5, 1, data, 0, 0, 0, 0);
                ret = -2;
-               ha->qla_initiate_recovery = 1;
+               ha->hw.mbx_comp_msecs[(Q8_MBX_COMP_MSECS - 1)]++;
+               QL_INITIATE_RECOVERY(ha);
                goto exit_qla_mbx_cmd;
        }
 
        for (i = 0; i < n_fwmbox; i++) {
+
+               if (ha->qla_initiate_recovery) {
+                       ql_sp_log(ha, 6, 1, ha->qla_initiate_recovery, 0, 0, 0, 
0);
+                       return (-1);
+               }
+
                *fw_mbox++ = READ_REG32(ha, (Q8_FW_MBOX0 + (i << 2)));
        }
 
        WRITE_REG32(ha, Q8_FW_MBOX_CNTRL, 0x0);
        WRITE_REG32(ha, ha->hw.mbx_intr_mask_offset, 0x0);
 
+       end_usecs = qla_get_usec_timestamp();
+
+       if (end_usecs > start_usecs) {
+               msecs_200 = (end_usecs - start_usecs)/(1000 * 200);
+
+               if (msecs_200 < 15) 
+                       ha->hw.mbx_comp_msecs[msecs_200]++;
+               else if (msecs_200 < 20)
+                       ha->hw.mbx_comp_msecs[15]++;
+               else {
+                       device_printf(ha->pci_dev, "%s: [%ld, %ld] %ld\n", 
__func__,
+                               start_usecs, end_usecs, msecs_200);
+                       ha->hw.mbx_comp_msecs[16]++;
+               }
+       }
+       ql_sp_log(ha, 7, 5, fw_mbox[0], fw_mbox[1], fw_mbox[2], fw_mbox[3], 
fw_mbox[4]);
+
+
 exit_qla_mbx_cmd:
        return (ret);
 }
@@ -1401,7 +1586,8 @@ qla_config_intr_cntxt(qla_host_t *ha, uint32_t start_i
        if (qla_mbx_cmd(ha, (uint32_t *)c_intr,
                (sizeof (q80_config_intr_t) >> 2),
                ha->hw.mbox, (sizeof (q80_config_intr_rsp_t) >> 2), 0)) {
-               device_printf(dev, "%s: failed0\n", __func__);
+               device_printf(dev, "%s: %s failed0\n", __func__,
+                       (create ? "create" : "delete"));
                return (-1);
        }
 
@@ -1410,8 +1596,8 @@ qla_config_intr_cntxt(qla_host_t *ha, uint32_t start_i
        err = Q8_MBX_RSP_STATUS(c_intr_rsp->regcnt_status);
 
        if (err) {
-               device_printf(dev, "%s: failed1 [0x%08x, %d]\n", __func__, err,
-                       c_intr_rsp->nentries);
+               device_printf(dev, "%s: %s failed1 [0x%08x, %d]\n", __func__,
+                       (create ? "create" : "delete"), err, 
c_intr_rsp->nentries);
 
                for (i = 0; i < c_intr_rsp->nentries; i++) {
                        device_printf(dev, "%s: [%d]:[0x%x 0x%x 0x%x]\n",
@@ -2015,7 +2201,8 @@ ql_get_stats(qla_host_t *ha)
 
        cmd |= ((ha->pci_func & 0x1) << 16);
 
-       if (ha->qla_watchdog_pause)
+       if (ha->qla_watchdog_pause || (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 
||
+               ha->offline)
                goto ql_get_stats_exit;
 
        if (qla_get_hw_stats(ha, cmd, sizeof (q80_get_stats_rsp_t)) == 0) {
@@ -2032,7 +2219,8 @@ ql_get_stats(qla_host_t *ha)
 //     cmd |= Q8_GET_STATS_CMD_CLEAR;
        cmd |= (ha->hw.rcv_cntxt_id << 16);
 
-       if (ha->qla_watchdog_pause)
+       if (ha->qla_watchdog_pause || (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 
||
+               ha->offline)
                goto ql_get_stats_exit;
 
        if (qla_get_hw_stats(ha, cmd, sizeof (q80_get_stats_rsp_t)) == 0) {
@@ -2043,13 +2231,18 @@ ql_get_stats(qla_host_t *ha)
                        __func__, ha->hw.mbox[0]);
        }
 
-       if (ha->qla_watchdog_pause)
+       if (ha->qla_watchdog_pause || (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 
||
+               ha->offline)
                goto ql_get_stats_exit;
        /*
         * Get XMT Statistics
         */
-       for (i = 0 ; ((i < ha->hw.num_tx_rings) && (!ha->qla_watchdog_pause));
-               i++) {
+       for (i = 0 ; (i < ha->hw.num_tx_rings); i++) {
+               if (ha->qla_watchdog_pause ||
+                       (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) ||
+                       ha->offline)
+                       goto ql_get_stats_exit;
+
                cmd = Q8_GET_STATS_CMD_XMT | Q8_GET_STATS_CMD_TYPE_CNTXT;
 //             cmd |= Q8_GET_STATS_CMD_CLEAR;
                cmd |= (ha->hw.tx_cntxt[i].tx_cntxt_id << 16);
@@ -2679,7 +2872,8 @@ ql_del_hw_if(qla_host_t *ha)
 
        qla_del_rcv_cntxt(ha);
 
-       qla_del_xmt_cntxt(ha);
+       if(qla_del_xmt_cntxt(ha))
+               goto ql_del_hw_if_exit;
 
        if (ha->hw.flags.init_intr_cnxt) {
                for (i = 0; i < ha->hw.num_sds_rings; ) {
@@ -2688,14 +2882,17 @@ ql_del_hw_if(qla_host_t *ha)
                                num_msix = Q8_MAX_INTR_VECTORS;
                        else
                                num_msix = ha->hw.num_sds_rings - i;
-                       qla_config_intr_cntxt(ha, i, num_msix, 0);
 
+                       if (qla_config_intr_cntxt(ha, i, num_msix, 0))
+                               break;
+
                        i += num_msix;
                }
 
                ha->hw.flags.init_intr_cnxt = 0;
        }
 
+ql_del_hw_if_exit:
        if (ha->hw.enable_soft_lro) {
                qla_drain_soft_lro(ha);
                qla_free_soft_lro(ha);
@@ -3328,19 +3525,22 @@ qla_del_xmt_cntxt_i(qla_host_t *ha, uint32_t txr_idx)
 
        return (0);
 }
-static void
+static int
 qla_del_xmt_cntxt(qla_host_t *ha)
 {
        uint32_t i;
+       int ret = 0;
 
        if (!ha->hw.flags.init_tx_cnxt)
-               return;
+               return (ret);
 
        for (i = 0; i < ha->hw.num_tx_rings; i++) {
-               if (qla_del_xmt_cntxt_i(ha, i))
+               if ((ret = qla_del_xmt_cntxt_i(ha, i)) != 0)
                        break;
        }
        ha->hw.flags.init_tx_cnxt = 0;
+
+       return (ret);
 }
 
 static int
@@ -3350,8 +3550,10 @@ qla_init_xmt_cntxt(qla_host_t *ha)
 
        for (i = 0; i < ha->hw.num_tx_rings; i++) {
                if (qla_init_xmt_cntxt_i(ha, i) != 0) {
-                       for (j = 0; j < i; j++)
-                               qla_del_xmt_cntxt_i(ha, j);
+                       for (j = 0; j < i; j++) {
+                               if (qla_del_xmt_cntxt_i(ha, j))
+                                       break;
+                       }
                        return (-1);
                }
        }
@@ -3627,22 +3829,23 @@ ql_hw_tx_done_locked(qla_host_t *ha, uint32_t txr_idx)
 void
 ql_update_link_state(qla_host_t *ha)
 {
-       uint32_t link_state;
+       uint32_t link_state = 0;
        uint32_t prev_link_state;
 
-       if (!(ha->ifp->if_drv_flags & IFF_DRV_RUNNING)) {
-               ha->hw.link_up = 0;
-               return;
-       }
-       link_state = READ_REG32(ha, Q8_LINK_STATE);
-
        prev_link_state =  ha->hw.link_up;
 
-       if (ha->pci_func == 0) 
-               ha->hw.link_up = (((link_state & 0xF) == 1)? 1 : 0);
-       else
-               ha->hw.link_up = ((((link_state >> 4)& 0xF) == 1)? 1 : 0);
+       if (ha->ifp->if_drv_flags & IFF_DRV_RUNNING) {
+               link_state = READ_REG32(ha, Q8_LINK_STATE);
 
+               if (ha->pci_func == 0) {
+                       link_state = (((link_state & 0xF) == 1)? 1 : 0);
+               } else {
+                       link_state = ((((link_state >> 4)& 0xF) == 1)? 1 : 0);
+               }
+       }
+
+       atomic_store_rel_8(&ha->hw.link_up, (uint8_t)link_state);
+
        if (prev_link_state !=  ha->hw.link_up) {
                if (ha->hw.link_up) {
                        if_link_state_change(ha->ifp, LINK_STATE_UP);
@@ -3669,8 +3872,14 @@ ql_hw_check_health(qla_host_t *ha)
 
        if (((val & 0xFFFF) == 2) || ((val & 0xFFFF) == 3) ||
                (QL_ERR_INJECT(ha, INJCT_TEMPERATURE_FAILURE))) {
-               device_printf(ha->pci_dev, "%s: Temperature Alert [0x%08x]\n",
-                       __func__, val);
+               device_printf(ha->pci_dev, "%s: Temperature Alert"
+                       " at ts_usecs %ld ts_reg = 0x%08x\n",
+                       __func__, qla_get_usec_timestamp(), val);
+
+               if (ha->hw.sp_log_stop_events & Q8_SP_LOG_STOP_TEMP_FAILURE)
+                       ha->hw.sp_log_stop = -1;
+
+               QL_INITIATE_RECOVERY(ha);
                return -1;
        }
 
@@ -3691,10 +3900,26 @@ ql_hw_check_health(qla_host_t *ha)
                        __func__, val);
        if (ha->hw.hbeat_failure < 2) /* we ignore the first failure */
                return 0;
-       else 
-               device_printf(ha->pci_dev, "%s: Heartbeat Failue [0x%08x]\n",
-                       __func__, val);
+       else {
+               uint32_t peg_halt_status1;
+               uint32_t peg_halt_status2;
 
+               peg_halt_status1 = READ_REG32(ha, Q8_PEG_HALT_STATUS1);
+               peg_halt_status2 = READ_REG32(ha, Q8_PEG_HALT_STATUS2);
+
+               device_printf(ha->pci_dev,
+                       "%s: Heartbeat Failue at ts_usecs = %ld "
+                       "fw_heart_beat = 0x%08x "
+                       "peg_halt_status1 = 0x%08x "
+                       "peg_halt_status2 = 0x%08x\n",
+                       __func__, qla_get_usec_timestamp(), val,
+                       peg_halt_status1, peg_halt_status2);
+
+               if (ha->hw.sp_log_stop_events & Q8_SP_LOG_STOP_HBEAT_FAILURE)
+                       ha->hw.sp_log_stop = -1;
+       }
+       QL_INITIATE_RECOVERY(ha);
+
        return -1;
 }
 
@@ -4429,8 +4654,8 @@ ql_minidump(qla_host_t *ha)
 
        if (ha->hw.mdump_done)
                return;
-
-               ha->hw.mdump_start_seq_index = ql_stop_sequence(ha);
+       ha->hw.mdump_usec_ts = qla_get_usec_timestamp();
+       ha->hw.mdump_start_seq_index = ql_stop_sequence(ha);
 
        bzero(ha->hw.mdump_buffer, ha->hw.mdump_buffer_size);
        bzero(ha->hw.mdump_template, ha->hw.mdump_template_size);

Modified: stable/10/sys/dev/qlxgbe/ql_hw.h
==============================================================================
--- stable/10/sys/dev/qlxgbe/ql_hw.h    Tue Mar  6 23:12:32 2018        
(r330555)
+++ stable/10/sys/dev/qlxgbe/ql_hw.h    Tue Mar  6 23:17:56 2018        
(r330556)
@@ -1600,26 +1600,26 @@ typedef struct _qla_hw {
                uint32_t
                        unicast_mac     :1,
                        bcast_mac       :1,
-                       loopback_mode   :2,
                        init_tx_cnxt    :1,
                        init_rx_cnxt    :1,
                        init_intr_cnxt  :1,
-                       fduplex         :1,
-                       autoneg         :1,
                        fdt_valid       :1;
        } flags;
 
 
-       uint16_t        link_speed;
-       uint16_t        cable_length;
-       uint32_t        cable_oui;
-       uint8_t         link_up;
-       uint8_t         module_type;
-       uint8_t         link_faults;
+       volatile uint16_t       link_speed;
+       volatile uint16_t       cable_length;
+       volatile uint32_t       cable_oui;
+       volatile uint8_t        link_up;
+       volatile uint8_t        module_type;
+       volatile uint8_t        link_faults;
+       volatile uint8_t        loopback_mode;
+       volatile uint8_t        fduplex;
+       volatile uint8_t        autoneg;
 
-       uint8_t         mac_rcv_mode;
+       volatile uint8_t        mac_rcv_mode;
 
-       uint32_t        max_mtu;
+       volatile uint32_t       max_mtu;
 
        uint8_t         mac_addr[ETHER_ADDR_LEN];
 
@@ -1703,9 +1703,25 @@ typedef struct _qla_hw {
        uint32_t        mdump_buffer_size;
        void            *mdump_template;
        uint32_t        mdump_template_size;
+       uint64_t        mdump_usec_ts;
 
+#define Q8_MBX_COMP_MSECS      (19)
+       uint64_t        mbx_comp_msecs[Q8_MBX_COMP_MSECS];
        /* driver state related */
        void            *drvr_state;
+
+       /* slow path trace */
+       uint32_t        sp_log_stop_events;
+#define Q8_SP_LOG_STOP_HBEAT_FAILURE           0x001
+#define Q8_SP_LOG_STOP_TEMP_FAILURE            0x002
+#define Q8_SP_LOG_STOP_HW_INIT_FAILURE         0x004
+#define Q8_SP_LOG_STOP_IF_START_FAILURE                0x008
+#define Q8_SP_LOG_STOP_ERR_RECOVERY_FAILURE    0x010
+
+       uint32_t        sp_log_stop;
+       uint32_t        sp_log_index;
+       uint32_t        sp_log_num_entries;
+       void            *sp_log;
 } qla_hw_t;
 
 #define QL_UPDATE_RDS_PRODUCER_INDEX(ha, prod_reg, val) \

Modified: stable/10/sys/dev/qlxgbe/ql_inline.h
==============================================================================
--- stable/10/sys/dev/qlxgbe/ql_inline.h        Tue Mar  6 23:12:32 2018        
(r330555)
+++ stable/10/sys/dev/qlxgbe/ql_inline.h        Tue Mar  6 23:17:56 2018        
(r330556)
@@ -166,7 +166,7 @@ qla_lock(qla_host_t *ha, const char *str, uint32_t tim
        while (1) {
                mtx_lock(&ha->hw_lock);
 
-               if (ha->qla_detach_active) {
+               if (ha->qla_detach_active || ha->offline) {
                        mtx_unlock(&ha->hw_lock);
                        break;
                }
@@ -191,7 +191,10 @@ qla_lock(qla_host_t *ha, const char *str, uint32_t tim
                }
        }
 
-       //device_printf(ha->pci_dev, "%s: %s ret = %d\n", __func__, str,ret);
+//     if (!ha->enable_error_recovery)
+//             device_printf(ha->pci_dev, "%s: %s ret = %d\n", __func__,
+//                     str,ret);
+
        return (ret);
 }
 
@@ -202,7 +205,9 @@ qla_unlock(qla_host_t *ha, const char *str)
        ha->hw_lock_held = 0;
        ha->qla_unlock = str;
        mtx_unlock(&ha->hw_lock);
-       //device_printf(ha->pci_dev, "%s: %s\n", __func__, str);
+
+//     if (!ha->enable_error_recovery)
+//             device_printf(ha->pci_dev, "%s: %s\n", __func__, str);
 
        return;
 }

Modified: stable/10/sys/dev/qlxgbe/ql_ioctl.c
==============================================================================
--- stable/10/sys/dev/qlxgbe/ql_ioctl.c Tue Mar  6 23:12:32 2018        
(r330555)
+++ stable/10/sys/dev/qlxgbe/ql_ioctl.c Tue Mar  6 23:17:56 2018        
(r330556)
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
 #include "ql_ver.h"
 #include "ql_dbg.h"
 
+static int ql_slowpath_log(qla_host_t *ha, qla_sp_log_t *log);
 static int ql_drvr_state(qla_host_t *ha, qla_driver_state_t *drvr_state);
 static uint32_t ql_drvr_state_size(qla_host_t *ha);
 static int ql_eioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
@@ -224,6 +225,7 @@ ql_eioctl(struct cdev *dev, u_long cmd, caddr_t data, 
        case QLA_RD_FW_DUMP:
 
                if (ha->hw.mdump_init == 0) {
+                       device_printf(pci_dev, "%s: minidump not 
initialized\n", __func__);
                        rval = EINVAL;
                        break;
                }
@@ -233,45 +235,85 @@ ql_eioctl(struct cdev *dev, u_long cmd, caddr_t data, 
                if ((fw_dump->minidump == NULL) ||
                        (fw_dump->minidump_size != (ha->hw.mdump_buffer_size +
                                ha->hw.mdump_template_size))) {
+                       device_printf(pci_dev,
+                               "%s: minidump buffer [%p] size = [%d, %d] 
invalid\n", __func__,
+                               fw_dump->minidump, fw_dump->minidump_size,
+                               (ha->hw.mdump_buffer_size + 
ha->hw.mdump_template_size));
                        rval = EINVAL;
                        break;
                }
 
-               if (QLA_LOCK(ha, __func__, QLA_LOCK_DEFAULT_MS_TIMEOUT, 0) == 
0) {
-                       if (!ha->hw.mdump_done)
-                               ha->qla_initiate_recovery = 1;
-                       QLA_UNLOCK(ha, __func__);
-               } else {
+               if ((ha->pci_func & 0x1)) {
+                       device_printf(pci_dev, "%s: mindump allowed only on 
Port0\n", __func__);
                        rval = ENXIO;
                        break;
                }
+
+               fw_dump->saved = 1;
+
+               if (ha->offline) {
+
+                       if (ha->enable_minidump)
+                               ql_minidump(ha);
+
+                       fw_dump->saved = 0;
+                       fw_dump->usec_ts = ha->hw.mdump_usec_ts;
+
+                       if (!ha->hw.mdump_done) {
+                               device_printf(pci_dev,
+                                       "%s: port offline minidump failed\n", 
__func__);
+                               rval = ENXIO;
+                               break;
+                       }
+               } else {
+
+                       if (QLA_LOCK(ha, __func__, QLA_LOCK_DEFAULT_MS_TIMEOUT, 
0) == 0) {
+                               if (!ha->hw.mdump_done) {
+                                       fw_dump->saved = 0;
+                                       QL_INITIATE_RECOVERY(ha);
+                                       device_printf(pci_dev, "%s: recovery 
initiated "
+                                               " to trigger minidump\n",
+                                               __func__);
+                               }
+                               QLA_UNLOCK(ha, __func__);
+                       } else {
+                               device_printf(pci_dev, "%s: QLA_LOCK() 
failed0\n", __func__);
+                               rval = ENXIO;
+                               break;
+                       }
        
 #define QLNX_DUMP_WAIT_SECS    30
 
-               count = QLNX_DUMP_WAIT_SECS * 1000;
+                       count = QLNX_DUMP_WAIT_SECS * 1000;
 
-               while (count) {
-                       if (ha->hw.mdump_done)
-                               break;
-                       qla_mdelay(__func__, 100);
-                       count -= 100;
-               }
+                       while (count) {
+                               if (ha->hw.mdump_done)
+                                       break;
+                               qla_mdelay(__func__, 100);
+                               count -= 100;
+                       }
 
-               if (!ha->hw.mdump_done) {
-                       rval = ENXIO;
-                       break;
-               }
+                       if (!ha->hw.mdump_done) {
+                               device_printf(pci_dev,
+                                       "%s: port not offline minidump 
failed\n", __func__);
+                               rval = ENXIO;
+                               break;
+                       }
+                       fw_dump->usec_ts = ha->hw.mdump_usec_ts;
                        
-               if (QLA_LOCK(ha, __func__, QLA_LOCK_DEFAULT_MS_TIMEOUT, 0) == 
0) {
-                       ha->hw.mdump_done = 0;
-                       QLA_UNLOCK(ha, __func__);
-               } else {
-                       rval = ENXIO;
-                       break;
+                       if (QLA_LOCK(ha, __func__, QLA_LOCK_DEFAULT_MS_TIMEOUT, 
0) == 0) {
+                               ha->hw.mdump_done = 0;
+                               QLA_UNLOCK(ha, __func__);
+                       } else {
+                               device_printf(pci_dev, "%s: QLA_LOCK() 
failed1\n", __func__);
+                               rval = ENXIO;
+                               break;
+                       }
                }
 
                if ((rval = copyout(ha->hw.mdump_template,
                        fw_dump->minidump, ha->hw.mdump_template_size))) {
+                       device_printf(pci_dev, "%s: template copyout failed\n", 
__func__);
                        rval = ENXIO;
                        break;
                }
@@ -279,14 +321,20 @@ ql_eioctl(struct cdev *dev, u_long cmd, caddr_t data, 
                if ((rval = copyout(ha->hw.mdump_buffer,
                                ((uint8_t *)fw_dump->minidump +
                                        ha->hw.mdump_template_size),
-                               ha->hw.mdump_buffer_size)))
+                               ha->hw.mdump_buffer_size))) {
+                       device_printf(pci_dev, "%s: minidump copyout failed\n", 
__func__);
                        rval = ENXIO;
+               }
                break;
 
        case QLA_RD_DRVR_STATE:
                rval = ql_drvr_state(ha, (qla_driver_state_t *)data);
                break;
 
+       case QLA_RD_SLOWPATH_LOG:
+               rval = ql_slowpath_log(ha, (qla_sp_log_t *)data);
+               break;
+
        case QLA_RD_PCI_IDS:
                pci_ids = (qla_rd_pci_ids_t *)data;
                pci_ids->ven_id = pci_get_vendor(pci_dev);
@@ -304,12 +352,12 @@ ql_eioctl(struct cdev *dev, u_long cmd, caddr_t data, 
 }
 
 
+
 static int
 ql_drvr_state(qla_host_t *ha, qla_driver_state_t *state)
 {
        int rval = 0;
        uint32_t drvr_state_size;
-       qla_drvr_state_hdr_t *hdr;
 
        drvr_state_size = ql_drvr_state_size(ha);
 
@@ -324,11 +372,8 @@ ql_drvr_state(qla_host_t *ha, qla_driver_state_t *stat
        if (ha->hw.drvr_state == NULL)
                return (ENOMEM);
 
-       hdr = ha->hw.drvr_state;
+       ql_capture_drvr_state(ha);
 
-       if (!hdr->drvr_version_major)
-               ql_capture_drvr_state(ha);
-
        rval = copyout(ha->hw.drvr_state, state->buffer, drvr_state_size);
 
        bzero(ha->hw.drvr_state, drvr_state_size);
@@ -416,22 +461,26 @@ ql_capture_drvr_state(qla_host_t *ha)
 {
        uint8_t *state_buffer;
        uint8_t *ptr;
-       uint32_t drvr_state_size;
        qla_drvr_state_hdr_t *hdr;
        uint32_t size;
        int i;
 
-       drvr_state_size = ql_drvr_state_size(ha);
-
        state_buffer =  ha->hw.drvr_state;
 
        if (state_buffer == NULL)
                return;
-       
-       bzero(state_buffer, drvr_state_size);
 
        hdr = (qla_drvr_state_hdr_t *)state_buffer;
+       
+       hdr->saved = 0;
 
+       if (hdr->drvr_version_major) {
+               hdr->saved = 1;
+               return;
+       }
+
+       hdr->usec_ts = qla_get_usec_timestamp();
+
        hdr->drvr_version_major = QLA_VERSION_MAJOR;
        hdr->drvr_version_minor = QLA_VERSION_MINOR;
        hdr->drvr_version_build = QLA_VERSION_BUILD;
@@ -512,6 +561,9 @@ ql_alloc_drvr_state_buffer(qla_host_t *ha)
 
        ha->hw.drvr_state =  malloc(drvr_state_size, M_QLA83XXBUF, M_NOWAIT);   
 
+       if (ha->hw.drvr_state != NULL)
+               bzero(ha->hw.drvr_state, drvr_state_size);
+
        return;
 }
 
@@ -521,5 +573,95 @@ ql_free_drvr_state_buffer(qla_host_t *ha)
        if (ha->hw.drvr_state != NULL)
                free(ha->hw.drvr_state, M_QLA83XXBUF);
        return;
+}
+
+void
+ql_sp_log(qla_host_t *ha, uint16_t fmtstr_idx, uint16_t num_params,
+       uint32_t param0, uint32_t param1, uint32_t param2, uint32_t param3,
+       uint32_t param4)
+{
+       qla_sp_log_entry_t *sp_e, *sp_log;
+
+       if (((sp_log = ha->hw.sp_log) == NULL) || ha->hw.sp_log_stop)
+               return;
+
+       mtx_lock(&ha->sp_log_lock);
+
+       sp_e = &sp_log[ha->hw.sp_log_index];
+
+       bzero(sp_e, sizeof (qla_sp_log_entry_t));
+
+       sp_e->fmtstr_idx = fmtstr_idx;
+       sp_e->num_params = num_params;
+
+       sp_e->usec_ts = qla_get_usec_timestamp();
+
+       sp_e->params[0] = param0;
+       sp_e->params[1] = param1;
+       sp_e->params[2] = param2;
+       sp_e->params[3] = param3;
+       sp_e->params[4] = param4;
+
+       ha->hw.sp_log_index = (ha->hw.sp_log_index + 1) & (NUM_LOG_ENTRIES - 1);
+
+       if (ha->hw.sp_log_num_entries < NUM_LOG_ENTRIES)
+               ha->hw.sp_log_num_entries++;
+
+       mtx_unlock(&ha->sp_log_lock);
+
+       return;
+}
+
+void
+ql_alloc_sp_log_buffer(qla_host_t *ha)
+{
+       uint32_t size;
+
+       size = (sizeof(qla_sp_log_entry_t)) * NUM_LOG_ENTRIES;
+
+       ha->hw.sp_log =  malloc(size, M_QLA83XXBUF, M_NOWAIT);  
+
+       if (ha->hw.sp_log != NULL)
+               bzero(ha->hw.sp_log, size);
+
+       ha->hw.sp_log_index = 0;
+       ha->hw.sp_log_num_entries = 0;
+
+       return;
+}
+
+void
+ql_free_sp_log_buffer(qla_host_t *ha)
+{
+       if (ha->hw.sp_log != NULL)
+               free(ha->hw.sp_log, M_QLA83XXBUF);
+       return;
+}
+

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to