Author: gallatin
Date: Tue Oct 20 18:58:28 2009
New Revision: 198303
URL: http://svn.freebsd.org/changeset/base/198303

Log:
  Make mxge do a better job recovering from NIC h/w faults
  by checking PCI config space when the NIC is not
  transmitting.  Previously, a h/w fault would not have been
  detected if the NIC was down, or handling an RX only
  workload.

Modified:
  head/sys/dev/mxge/if_mxge.c

Modified: head/sys/dev/mxge/if_mxge.c
==============================================================================
--- head/sys/dev/mxge/if_mxge.c Tue Oct 20 18:54:51 2009        (r198302)
+++ head/sys/dev/mxge/if_mxge.c Tue Oct 20 18:58:28 2009        (r198303)
@@ -3640,7 +3640,6 @@ mxge_open(mxge_softc_t *sc)
 #endif
        sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
        sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
-       callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
 
        return 0;
 
@@ -3661,7 +3660,6 @@ mxge_close(mxge_softc_t *sc, int down)
        int slice;
 #endif
 
-       callout_stop(&sc->co_hdl);
 #ifdef IFNET_BUF_RING
        for (slice = 0; slice < sc->num_slices; slice++) {
                ss = &sc->ss[slice];
@@ -3836,9 +3834,9 @@ mxge_watchdog_reset(mxge_softc_t *sc)
        if (err) {
                device_printf(sc->dev, "watchdog reset failed\n");
        } else {
-               if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
-                       callout_reset(&sc->co_hdl, mxge_ticks,
-                                     mxge_tick, sc);
+               if (sc->dying == 2)
+                       sc->dying = 0;
+               callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
        }
 }
 
@@ -3909,10 +3907,11 @@ mxge_watchdog(mxge_softc_t *sc)
        return (err);
 }
 
-static void
+static u_long
 mxge_update_stats(mxge_softc_t *sc)
 {
        struct mxge_slice_state *ss;
+       u_long pkts = 0;
        u_long ipackets = 0;
        u_long opackets = 0;
 #ifdef IFNET_BUF_RING
@@ -3934,6 +3933,8 @@ mxge_update_stats(mxge_softc_t *sc)
 #endif
                oerrors += ss->oerrors;
        }
+       pkts = (ipackets - sc->ifp->if_ipackets);
+       pkts += (opackets - sc->ifp->if_opackets);
        sc->ifp->if_ipackets = ipackets;
        sc->ifp->if_opackets = opackets;
 #ifdef IFNET_BUF_RING
@@ -3942,23 +3943,45 @@ mxge_update_stats(mxge_softc_t *sc)
        sc->ifp->if_snd.ifq_drops = odrops;
 #endif
        sc->ifp->if_oerrors = oerrors;
+       return pkts;
 }
 
 static void
 mxge_tick(void *arg)
 {
        mxge_softc_t *sc = arg;
+       u_long pkts = 0;
        int err = 0;
+       int running, ticks;
+       uint16_t cmd;
 
-       /* aggregate stats from different slices */
-       mxge_update_stats(sc);
-       if (!sc->watchdog_countdown) {
-               err = mxge_watchdog(sc);
-               sc->watchdog_countdown = 4;
+       ticks = mxge_ticks;
+       mtx_lock(&sc->driver_mtx);
+       running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
+       mtx_unlock(&sc->driver_mtx);
+       if (running) {
+               /* aggregate stats from different slices */
+               pkts = mxge_update_stats(sc);
+               if (!sc->watchdog_countdown) {
+                       err = mxge_watchdog(sc);
+                       sc->watchdog_countdown = 4;
+               }
+               sc->watchdog_countdown--;
+       }
+       if (pkts == 0) {
+               /* ensure NIC did not suffer h/w fault while idle */
+               cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);                
+               if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
+                       sc->dying = 2;
+                       taskqueue_enqueue(sc->tq, &sc->watchdog_task);
+                       err = ENXIO;
+               }
+               /* look less often if NIC is idle */
+               ticks *= 4;
        }
-       sc->watchdog_countdown--;
+
        if (err == 0)
-               callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
+               callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
 
 }
 
@@ -4747,6 +4770,7 @@ mxge_attach(device_t dev)
        ifp->if_transmit = mxge_transmit;
        ifp->if_qflush = mxge_qflush;
 #endif
+       callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
        return 0;
 
 abort_with_rings:
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to