Author: gallatin
Date: Mon Sep 28 23:48:16 2009
New Revision: 197607
URL: http://svn.freebsd.org/changeset/base/197607

Log:
  MFC 197395: Improve mxge watchdog routine's ability to reliably reset a 
failed NIC
  
  Approved by: re (kib)

Modified:
  stable/8/sys/   (props changed)
  stable/8/sys/amd64/include/xen/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/contrib/dev/acpica/   (props changed)
  stable/8/sys/contrib/pf/   (props changed)
  stable/8/sys/dev/mxge/if_mxge.c
  stable/8/sys/dev/xen/xenpci/   (props changed)

Modified: stable/8/sys/dev/mxge/if_mxge.c
==============================================================================
--- stable/8/sys/dev/mxge/if_mxge.c     Mon Sep 28 22:41:28 2009        
(r197606)
+++ stable/8/sys/dev/mxge/if_mxge.c     Mon Sep 28 23:48:16 2009        
(r197607)
@@ -143,7 +143,7 @@ MODULE_DEPEND(mxge, zlib, 1, 1, 1);
 
 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
-static int mxge_close(mxge_softc_t *sc);
+static int mxge_close(mxge_softc_t *sc, int down);
 static int mxge_open(mxge_softc_t *sc);
 static void mxge_tick(void *arg);
 
@@ -1305,8 +1305,7 @@ mxge_reset(mxge_softc_t *sc, int interru
                ss->lro_queued = 0;
                ss->lro_flushed = 0;
                if (ss->fw_stats != NULL) {
-                       ss->fw_stats->valid = 0;
-                       ss->fw_stats->send_done_count = 0;
+                       bzero(ss->fw_stats, sizeof *ss->fw_stats);
                }
        }
        sc->rdma_tags_available = 15;
@@ -1379,7 +1378,7 @@ mxge_change_lro_locked(mxge_softc_t *sc,
                ifp->if_capenable |= IFCAP_LRO;
        sc->lro_cnt = lro_cnt;
        if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
-               mxge_close(sc);
+               mxge_close(sc, 0);
                err = mxge_open(sc);
        }
        return err;
@@ -1495,6 +1494,10 @@ mxge_add_sysctls(mxge_softc_t *sc)
                       "read_write_dma_MBs",
                       CTLFLAG_RD, &sc->read_write_dma,
                       0, "DMA concurrent Read/Write speed in MB/s");
+       SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
+                      "watchdog_resets",
+                      CTLFLAG_RD, &sc->watchdog_resets,
+                      0, "Number of times NIC was reset");
 
 
        /* performance related tunables */
@@ -3600,7 +3603,7 @@ abort:
 }
 
 static int
-mxge_close(mxge_softc_t *sc)
+mxge_close(mxge_softc_t *sc, int down)
 {
        mxge_cmd_t cmd;
        int err, old_down_cnt;
@@ -3617,21 +3620,23 @@ mxge_close(mxge_softc_t *sc)
        }
 #endif
        sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-       old_down_cnt = sc->down_cnt;
-       wmb();
-       err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
-       if (err) {
-               device_printf(sc->dev, "Couldn't bring down link\n");
-       }
-       if (old_down_cnt == sc->down_cnt) {
-               /* wait for down irq */
-               DELAY(10 * sc->intr_coal_delay);
-       }
-       wmb();
-       if (old_down_cnt == sc->down_cnt) {
-               device_printf(sc->dev, "never got down irq\n");
+       if (!down) {
+               old_down_cnt = sc->down_cnt;
+               wmb();
+               err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
+               if (err) {
+                       device_printf(sc->dev,
+                                     "Couldn't bring down link\n");
+               }
+               if (old_down_cnt == sc->down_cnt) {
+                       /* wait for down irq */
+                       DELAY(10 * sc->intr_coal_delay);
+               }
+               wmb();
+               if (old_down_cnt == sc->down_cnt) {
+                       device_printf(sc->dev, "never got down irq\n");
+               }
        }
-
        mxge_free_mbufs(sc);
 
        return 0;
@@ -3684,8 +3689,9 @@ static int
 mxge_watchdog_reset(mxge_softc_t *sc, int slice)
 {
        struct pci_devinfo *dinfo;
+       struct mxge_slice_state *ss;
        mxge_tx_ring_t *tx;
-       int err;
+       int err, running, s, num_tx_slices = 1;
        uint32_t reboot;
        uint16_t cmd;
 
@@ -3719,6 +3725,30 @@ mxge_watchdog_reset(mxge_softc_t *sc, in
                reboot = mxge_read_reboot(sc);
                device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
                              reboot);
+               running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
+               if (running) {
+
+                       /* 
+                        * quiesce NIC so that TX routines will not try to
+                        * xmit after restoration of BAR
+                        */
+
+                       /* Mark the link as down */
+                       if (sc->link_state) {
+                               sc->link_state = 0;
+                               if_link_state_change(sc->ifp,
+                                                    LINK_STATE_DOWN);
+                       }
+#ifdef IFNET_BUF_RING
+                       num_tx_slices = sc->num_slices;
+#endif
+                       /* grab all TX locks to ensure no tx  */
+                       for (s = 0; s < num_tx_slices; s++) {
+                               ss = &sc->ss[s];
+                               mtx_lock(&ss->tx.mtx);
+                       }
+                       mxge_close(sc, 1);
+               }
                /* restore PCI configuration space */
                dinfo = device_get_ivars(sc->dev);
                pci_cfg_restore(sc->dev, dinfo);
@@ -3726,10 +3756,22 @@ mxge_watchdog_reset(mxge_softc_t *sc, in
                /* and redo any changes we made to our config space */
                mxge_setup_cfg_space(sc);
 
-               if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) {
-                       mxge_close(sc);
-                       err = mxge_open(sc);
+               /* reload f/w */
+               err = mxge_load_firmware(sc, 0);
+               if (err) {
+                       device_printf(sc->dev,
+                                     "Unable to re-load f/w\n");
                }
+               if (running) {
+                       if (!err)
+                               err = mxge_open(sc);
+                       /* release all TX locks */
+                       for (s = 0; s < num_tx_slices; s++) {
+                               ss = &sc->ss[s];
+                               mtx_unlock(&ss->tx.mtx);
+                       }
+               }
+               sc->watchdog_resets++;
        } else {
                tx = &sc->ss[slice].tx;
                device_printf(sc->dev,
@@ -3745,6 +3787,9 @@ mxge_watchdog_reset(mxge_softc_t *sc, in
                              be32toh(sc->ss->fw_stats->send_done_count));
                device_printf(sc->dev, "not resetting\n");
        }
+       if (err)
+               device_printf(sc->dev, "watchdog reset failed\n");
+
        return (err);
 }
 
@@ -3860,11 +3905,11 @@ mxge_change_mtu(mxge_softc_t *sc, int mt
        old_mtu = ifp->if_mtu;
        ifp->if_mtu = mtu;
        if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
-               mxge_close(sc);
+               mxge_close(sc, 0);
                err = mxge_open(sc);
                if (err != 0) {
                        ifp->if_mtu = old_mtu;
-                       mxge_close(sc);
+                       mxge_close(sc, 0);
                        (void) mxge_open(sc);
                }
        }
@@ -3922,7 +3967,7 @@ mxge_ioctl(struct ifnet *ifp, u_long com
                        }
                } else {
                        if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
-                               mxge_close(sc);
+                               mxge_close(sc, 0);
                        }
                }
                mtx_unlock(&sc->driver_mtx);
@@ -4645,7 +4690,7 @@ mxge_detach(device_t dev)
        mtx_lock(&sc->driver_mtx);
        sc->dying = 1;
        if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
-               mxge_close(sc);
+               mxge_close(sc, 0);
        mtx_unlock(&sc->driver_mtx);
        ether_ifdetach(sc->ifp);
        callout_drain(&sc->co_hdl);
_______________________________________________
[email protected] mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to