Hi Stefan, On 14/08/2017, at 7:07 PM, Stefan Sperling wrote:
> This diff makes iwm_stop() always run in a process context. > I want iwm_stop() to be able to sleep so that it can wait for asynchronous > driver tasks, and perhaps even wait for firmware commands, in the future. > > If the interrupt handler detects a fatal condition, instead of calling > iwm_stop() directly, defer to the init task. The init task looks at flags > to understand what happened and restarts or stops the device as appropriate. > > I found that toggling the RF kill switch can trigger a fatal firmware error. > Hence I am letting the interrupt handler check RF kill before checking for > fatal firmware error. Provides better error reporting when the kill switch > is used. > > During suspend, bring the device down during the QUIESCE stage which > is allowed to sleep. > > dhclient/down/scan in a loop still works (as far as it always has, with > various errors reported in dmesg). Suspend/resume still works. I've tested mine with a 'while true; do ifconfig down; ifconfig up; done' concurrent with a continuous kernel build. An hour later everything still worked. And I did see some iwm0: fatal firmware error iwm0: failed to update MAC etc My computestick lacks an RF kill button to test, though. One simplification below. ok procter@ best, Richard. iwm0 at pci1 dev 0 function 0 "Intel Dual Band Wireless AC 7265" rev 0x69, msi iwm0: hw rev 0x210, fw ver 16.242414.0, address xx:xx:xx:xx:xx:xx > ok? > > Index: if_iwm.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/if_iwm.c,v > retrieving revision 1.211 > diff -u -p -r1.211 if_iwm.c > --- if_iwm.c 13 Aug 2017 18:08:03 -0000 1.211 > +++ if_iwm.c 14 Aug 2017 06:44:59 -0000 > @@ -6517,6 +6517,7 @@ iwm_stop(struct ifnet *ifp) > sc->sc_flags &= ~IWM_FLAG_BINDING_ACTIVE; > sc->sc_flags &= ~IWM_FLAG_STA_ACTIVE; > sc->sc_flags &= ~IWM_FLAG_TE_ACTIVE; > + sc->sc_flags &= ~IWM_FLAG_HW_ERR; > > sc->sc_newstate(ic, IEEE80211_S_INIT, -1); > > @@ -7169,7 +7170,6 @@ int > iwm_intr(void *arg) > { > struct iwm_softc *sc = arg; > - struct ifnet *ifp = IC2IFP(&sc->sc_ic); > int handled = 0; > int r1, r2, rv = 0; > int isperiodic = 0; > @@ -7218,6 +7218,15 @@ iwm_intr(void *arg) > /* ignored */ > handled |= (r1 & (IWM_CSR_INT_BIT_ALIVE /*| IWM_CSR_INT_BIT_SCD*/)); > > + if (r1 & IWM_CSR_INT_BIT_RF_KILL) { > + handled |= IWM_CSR_INT_BIT_RF_KILL; > + if (iwm_check_rfkill(sc)) { > + task_add(systq, &sc->init_task); > + rv = 1; > + goto out; > + } > + } > + > if (r1 & IWM_CSR_INT_BIT_SW_ERR) { > #ifdef IWM_DEBUG > int i; > @@ -7238,7 +7247,6 @@ iwm_intr(void *arg) > #endif > > printf("%s: fatal firmware error\n", DEVNAME(sc)); > - iwm_stop(ifp); > task_add(systq, &sc->init_task); > rv = 1; > goto out; > @@ -7248,7 +7256,8 @@ iwm_intr(void *arg) > if (r1 & IWM_CSR_INT_BIT_HW_ERR) { > handled |= IWM_CSR_INT_BIT_HW_ERR; > printf("%s: hardware error, stopping device \n", DEVNAME(sc)); > - iwm_stop(ifp); > + sc->sc_flags |= IWM_FLAG_HW_ERR; > + task_add(systq, &sc->init_task); > rv = 1; > goto out; > } > @@ -7262,13 +7271,6 @@ iwm_intr(void *arg) > wakeup(&sc->sc_fw); > } > > - if (r1 & IWM_CSR_INT_BIT_RF_KILL) { > - handled |= IWM_CSR_INT_BIT_RF_KILL; > - if (iwm_check_rfkill(sc) && (ifp->if_flags & IFF_UP)) { > - iwm_stop(ifp); > - } > - } > - > if (r1 & IWM_CSR_INT_BIT_RX_PERIODIC) { > handled |= IWM_CSR_INT_BIT_RX_PERIODIC; > IWM_WRITE(sc, IWM_CSR_INT, IWM_CSR_INT_BIT_RX_PERIODIC); > @@ -7739,23 +7741,27 @@ iwm_init_task(void *arg1) > { > struct iwm_softc *sc = arg1; > struct ifnet *ifp = &sc->sc_ic.ic_if; > - int s; > + int s = splnet(); > int generation = sc->sc_generation; > + int fatal = (sc->sc_flags & (IWM_FLAG_HW_ERR | IWM_FLAG_RFKILL)); (I presume that sleeping in rw_enter_write() drops splnet.) > rw_enter_write(&sc->ioctl_rwl); > if (generation != sc->sc_generation) { > rw_exit(&sc->ioctl_rwl); > + splx(s); > return; > } > - s = splnet(); > > if (ifp->if_flags & IFF_RUNNING) > iwm_stop(ifp); > - if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == IFF_UP) > + else if (sc->sc_flags & IWM_FLAG_HW_ERR) > + sc->sc_flags &= ~IWM_FLAG_HW_ERR; iwm_stop() clears IWM_FLAG_HW_ERR unconditionally, too, so at this point IWM_FLAG_HW_ERR is clear. Suggest the simpler and equivalent if (ifp->if_flags & IFF_RUNNING) iwm_stop(ifp); + sc->sc_flags &= ~IWM_FLAG_HW_ERR; - if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == IFF_UP) > + > + if (!fatal && (ifp->if_flags & (IFF_UP | IFF_RUNNING)) == IFF_UP) > iwm_init(ifp); > > - splx(s); > rw_exit(&sc->ioctl_rwl); > + splx(s); > } > > int > @@ -7778,9 +7784,12 @@ iwm_activate(struct device *self, int ac > int err = 0; > > switch (act) { > - case DVACT_SUSPEND: > - if (ifp->if_flags & IFF_RUNNING) > + case DVACT_QUIESCE: > + if (ifp->if_flags & IFF_RUNNING) { > + rw_enter_write(&sc->ioctl_rwl); > iwm_stop(ifp); > + rw_exit(&sc->ioctl_rwl); > + } > break; > case DVACT_RESUME: > err = iwm_resume(sc); > Index: if_iwmvar.h > =================================================================== > RCS file: /cvs/src/sys/dev/pci/if_iwmvar.h,v > retrieving revision 1.33 > diff -u -p -r1.33 if_iwmvar.h > --- if_iwmvar.h 13 Aug 2017 15:34:54 -0000 1.33 > +++ if_iwmvar.h 14 Aug 2017 06:00:16 -0000 > @@ -286,6 +286,7 @@ struct iwm_rx_ring { > #define IWM_FLAG_BINDING_ACTIVE 0x10 /* MAC->PHY binding added to > firmware */ > #define IWM_FLAG_STA_ACTIVE 0x20 /* AP added to firmware station table */ > #define IWM_FLAG_TE_ACTIVE 0x40 /* time event is scheduled */ > +#define IWM_FLAG_HW_ERR 0x80 /* hardware error occurred */ > > struct iwm_ucode_status { > uint32_t uc_error_event_table; >