fix iwm(4) newstate task (was: Re: iwm(4): make iwm_newstate() interrupt safe)
On Sun, Jul 19, 2015 at 04:32:39AM +0200, Stefan Sperling wrote: Please test this if you use iwm(4). It should make the driver more reliable, e.g. when bringing the interface up which sometimes fails because of... reasons. Please test this updated diff instead. The previous one had races between the ioctl handler and the newstate function, pointed out by kettenis@. I believe claudio@ has already hit them. I got help from mpi@, too. Index: if_iwm.c === RCS file: /cvs/src/sys/dev/pci/if_iwm.c,v retrieving revision 1.45 diff -u -p -r1.45 if_iwm.c --- if_iwm.c15 Jun 2015 08:06:11 - 1.45 +++ if_iwm.c20 Jul 2015 22:33:16 - @@ -195,14 +195,6 @@ const struct iwm_rate { #define IWM_RIDX_IS_CCK(_i_) ((_i_) IWM_RIDX_OFDM) #define IWM_RIDX_IS_OFDM(_i_) ((_i_) = IWM_RIDX_OFDM) -struct iwm_newstate_state { - struct task ns_wk; - struct ieee80211com *ns_ic; - enum ieee80211_state ns_nstate; - int ns_arg; - int ns_generation; -}; - intiwm_store_cscheme(struct iwm_softc *, uint8_t *, size_t); intiwm_firmware_store_section(struct iwm_softc *, enum iwm_ucode_type, uint8_t *, size_t); @@ -406,13 +398,13 @@ struct ieee80211_node *iwm_node_alloc(st void iwm_calib_timeout(void *); void iwm_setrates(struct iwm_node *); intiwm_media_change(struct ifnet *); -void iwm_newstate_cb(void *); +void iwm_newstate_task(void *); intiwm_newstate(struct ieee80211com *, enum ieee80211_state, int); void iwm_endscan_cb(void *); intiwm_init_hw(struct iwm_softc *); intiwm_init(struct ifnet *); void iwm_start(struct ifnet *); -void iwm_stop(struct ifnet *, int); +void iwm_stop(struct ifnet *); void iwm_watchdog(struct ifnet *); intiwm_ioctl(struct ifnet *, u_long, iwm_caddr_t); const char *iwm_desc_lookup(uint32_t); @@ -425,9 +417,9 @@ int iwm_match(struct device *, void *, v intiwm_preinit(struct iwm_softc *); void iwm_attach_hook(iwm_hookarg_t); void iwm_attach(struct device *, struct device *, void *); -void iwm_init_task(void *); intiwm_activate(struct device *, int); -void iwm_wakeup(struct iwm_softc *); +void iwm_suspend(struct iwm_softc *); +void iwm_resume(struct iwm_softc *); #if NBPFILTER 0 void iwm_radiotap_attach(struct iwm_softc *); @@ -5250,40 +5242,27 @@ iwm_media_change(struct ifnet *ifp) sc-sc_fixed_ridx = ridx; } - if ((ifp-if_flags (IFF_UP | IFF_RUNNING)) == - (IFF_UP | IFF_RUNNING)) { - iwm_stop(ifp, 0); - error = iwm_init(ifp); - } - return error; + /* +* No need to do anything on the hardware side. +* Channel changes will be applied during the +* association sequence in iwm_auth(). +*/ + + return (0); } void -iwm_newstate_cb(void *wk) +iwm_newstate_task(void *arg) { - struct iwm_newstate_state *iwmns = (void *)wk; - struct ieee80211com *ic = iwmns-ns_ic; - enum ieee80211_state nstate = iwmns-ns_nstate; - int generation = iwmns-ns_generation; + struct iwm_softc *sc = arg; + struct ieee80211com *ic = sc-sc_ic; + struct ifnet *ifp = IC2IFP(sc-sc_ic); + struct iwm_newstate_task_arg *task_arg = sc-sc_newstate_task_arg; + enum ieee80211_state nstate = task_arg-state; struct iwm_node *in; - int arg = iwmns-ns_arg; - struct ifnet *ifp = IC2IFP(ic); - struct iwm_softc *sc = ifp-if_softc; int error; - free(iwmns, M_DEVBUF, sizeof(*iwmns)); - - DPRINTF((Prepare to switch state %d-%d\n, ic-ic_state, nstate)); - if (sc-sc_generation != generation) { - DPRINTF((newstate_cb: someone pulled the plug meanwhile\n)); - if (nstate == IEEE80211_S_INIT) { - DPRINTF((newstate_cb: nstate == IEEE80211_S_INIT: calling sc_newstate()\n)); - sc-sc_newstate(ic, nstate, arg); - } - return; - } - - DPRINTF((switching state %d-%d\n, ic-ic_state, nstate)); + sc-sc_newstate_errno = 0; if (ic-ic_state == IEEE80211_S_SCAN nstate != ic-ic_state) iwm_led_blink_stop(sc); @@ -5292,6 +5271,8 @@ iwm_newstate_cb(void *wk) if (ic-ic_state == IEEE80211_S_RUN nstate != ic-ic_state) { iwm_mvm_disable_beacon_filter(sc, (void *)ic-ic_bss); + timeout_del(sc-sc_calib_to); + if (((in = (void *)ic-ic_bss) != NULL)) in-in_assoc = 0; iwm_release(sc, NULL); @@ -5310,8 +5291,9 @@ iwm_newstate_cb(void *wk) if (nstate == IEEE80211_S_SCAN || nstate == IEEE80211_S_AUTH || nstate == IEEE80211_S_ASSOC) { - DPRINTF((Force transition to INIT; MGT=%d\n, arg)); -
Re: iwm(4): make iwm_newstate() interrupt safe
On Thu, Jun 18, 2015 at 04:23:43PM +0200, Stefan Sperling wrote: The net80211 stack assumes drivers will switch IEEE80211_S_* states in interrupt context. iwm(4) does not follow this rule. Since it insists on responses from firmware commands to look for success or failure and it uses tsleep() to wait for responses it cannot switch state in interrupt context. So currently, the entire state machine is deferred to process context (big hammer solution) :-/ Complications arise in the suspend/resume path because of this, such as http://marc.info/?l=openbsd-techm=143438073018743w=2 apart from several other such issues where a failure on part of the firmware to respond will deadlock the driver in an endless tsleep. I would very much like iwm_newstate() to be interrupt safe and get rid of the pesky newstate_cb task which wraps it. It makes debugging and following the control flow difficult. And I hope the driver will be more stable overall. There are two ways to approach this: - Simply don't care about answers from firmware when in interrupt (note that this is what iwn(4) does) - Busy-wait for replies from the firmware when in interrupt Here's a diff implementing a third approach, discussed with mpi@. - Keep the newstate transitions in a task thread, but only ever schedule one 80211 state transition at a time. Requires a tweak for suspend/resume, which wants to run two state transitions at resume time if the interface was up during suspend (back to INIT, then INIT - SCAN). Please test this if you use iwm(4). It should make the driver more reliable, e.g. when bringing the interface up which sometimes fails because of... reasons. Index: if_iwm.c === RCS file: /cvs/src/sys/dev/pci/if_iwm.c,v retrieving revision 1.45 diff -u -p -r1.45 if_iwm.c --- if_iwm.c15 Jun 2015 08:06:11 - 1.45 +++ if_iwm.c19 Jul 2015 02:13:13 - @@ -195,14 +195,6 @@ const struct iwm_rate { #define IWM_RIDX_IS_CCK(_i_) ((_i_) IWM_RIDX_OFDM) #define IWM_RIDX_IS_OFDM(_i_) ((_i_) = IWM_RIDX_OFDM) -struct iwm_newstate_state { - struct task ns_wk; - struct ieee80211com *ns_ic; - enum ieee80211_state ns_nstate; - int ns_arg; - int ns_generation; -}; - intiwm_store_cscheme(struct iwm_softc *, uint8_t *, size_t); intiwm_firmware_store_section(struct iwm_softc *, enum iwm_ucode_type, uint8_t *, size_t); @@ -406,13 +398,13 @@ struct ieee80211_node *iwm_node_alloc(st void iwm_calib_timeout(void *); void iwm_setrates(struct iwm_node *); intiwm_media_change(struct ifnet *); -void iwm_newstate_cb(void *); +void iwm_newstate_task(void *); intiwm_newstate(struct ieee80211com *, enum ieee80211_state, int); void iwm_endscan_cb(void *); intiwm_init_hw(struct iwm_softc *); intiwm_init(struct ifnet *); void iwm_start(struct ifnet *); -void iwm_stop(struct ifnet *, int); +void iwm_stop(struct ifnet *); void iwm_watchdog(struct ifnet *); intiwm_ioctl(struct ifnet *, u_long, iwm_caddr_t); const char *iwm_desc_lookup(uint32_t); @@ -427,7 +419,8 @@ voidiwm_attach_hook(iwm_hookarg_t); void iwm_attach(struct device *, struct device *, void *); void iwm_init_task(void *); intiwm_activate(struct device *, int); -void iwm_wakeup(struct iwm_softc *); +void iwm_suspend(struct iwm_softc *); +void iwm_resume(struct iwm_softc *); #if NBPFILTER 0 void iwm_radiotap_attach(struct iwm_softc *); @@ -5252,38 +5245,25 @@ iwm_media_change(struct ifnet *ifp) if ((ifp-if_flags (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING)) { - iwm_stop(ifp, 0); + iwm_stop(ifp); error = iwm_init(ifp); } return error; } void -iwm_newstate_cb(void *wk) +iwm_newstate_task(void *arg) { - struct iwm_newstate_state *iwmns = (void *)wk; - struct ieee80211com *ic = iwmns-ns_ic; - enum ieee80211_state nstate = iwmns-ns_nstate; - int generation = iwmns-ns_generation; + struct iwm_softc *sc = arg; + struct ieee80211com *ic = sc-sc_ic; + struct iwm_newstate_task_arg *task_arg = sc-sc_newstate_task_arg; + enum ieee80211_state nstate = task_arg-state; struct iwm_node *in; - int arg = iwmns-ns_arg; - struct ifnet *ifp = IC2IFP(ic); - struct iwm_softc *sc = ifp-if_softc; int error; - free(iwmns, M_DEVBUF, sizeof(*iwmns)); - - DPRINTF((Prepare to switch state %d-%d\n, ic-ic_state, nstate)); - if (sc-sc_generation != generation) { - DPRINTF((newstate_cb: someone pulled the plug meanwhile\n)); - if (nstate == IEEE80211_S_INIT) { - DPRINTF((newstate_cb: nstate == IEEE80211_S_INIT: calling sc_newstate()\n)); - sc-sc_newstate(ic, nstate, arg); - } -
iwm(4): make iwm_newstate() interrupt safe
The net80211 stack assumes drivers will switch IEEE80211_S_* states in interrupt context. iwm(4) does not follow this rule. Since it insists on responses from firmware commands to look for success or failure and it uses tsleep() to wait for responses it cannot switch state in interrupt context. So currently, the entire state machine is deferred to process context (big hammer solution) :-/ Complications arise in the suspend/resume path because of this, such as http://marc.info/?l=openbsd-techm=143438073018743w=2 apart from several other such issues where a failure on part of the firmware to respond will deadlock the driver in an endless tsleep. I would very much like iwm_newstate() to be interrupt safe and get rid of the pesky newstate_cb task which wraps it. It makes debugging and following the control flow difficult. And I hope the driver will be more stable overall. There are two ways to approach this: - Simply don't care about answers from firmware when in interrupt (note that this is what iwn(4) does) - Busy-wait for replies from the firmware when in interrupt The diff below is an attempt at the first approach simply because it can be written with less code (no busy waits). And if we ignore an error response and the firmware freaks out, well, we'll eventually handle the problem by resetting the hardware. There's nothing much else we could do if a command fails to complete after busy-wait anyway. I would very much like other people to test this change to ensure I'm not breaking some other use case I can't cover. And perhaps there's a condition where a busy-wait is necessary to ensure proper operation? I've tested suspend/resume, and switching associations between a couple of different APs, and interface down/up on the AP side. I'm not seeing any regressions apart from the fact that the firmware will crash if a scan is run while the interface is up -- bringing the interface back up is possible so this is not entirely horrible. Note that we must get rid of iwm_release() or we break suspend/resume and de-auth from AP, because iwm_release() calls hw_init() which will tsleep() for loading firmware. This is probably related to why scans don't work while we're up. However, the way the driver currently handles RUN-SCAN is pretty broken as well since it merely provides an illusion of it working and doesn't actually know how to handle this transition in firmware/hardware. Index: if_iwm.c === RCS file: /cvs/src/sys/dev/pci/if_iwm.c,v retrieving revision 1.45 diff -u -p -r1.45 if_iwm.c --- if_iwm.c15 Jun 2015 08:06:11 - 1.45 +++ if_iwm.c18 Jun 2015 13:29:46 - @@ -195,14 +195,6 @@ const struct iwm_rate { #define IWM_RIDX_IS_CCK(_i_) ((_i_) IWM_RIDX_OFDM) #define IWM_RIDX_IS_OFDM(_i_) ((_i_) = IWM_RIDX_OFDM) -struct iwm_newstate_state { - struct task ns_wk; - struct ieee80211com *ns_ic; - enum ieee80211_state ns_nstate; - int ns_arg; - int ns_generation; -}; - intiwm_store_cscheme(struct iwm_softc *, uint8_t *, size_t); intiwm_firmware_store_section(struct iwm_softc *, enum iwm_ucode_type, uint8_t *, size_t); @@ -338,9 +330,9 @@ int iwm_send_cmd(struct iwm_softc *, str intiwm_mvm_send_cmd_pdu(struct iwm_softc *, uint8_t, uint32_t, uint16_t, const void *); intiwm_mvm_send_cmd_status(struct iwm_softc *, struct iwm_host_cmd *, - uint32_t *); + uint32_t *, int); intiwm_mvm_send_cmd_pdu_status(struct iwm_softc *, uint8_t, - uint16_t, const void *, uint32_t *); + uint16_t, const void *, uint32_t *, int); void iwm_free_resp(struct iwm_softc *, struct iwm_host_cmd *); void iwm_cmd_done(struct iwm_softc *, struct iwm_rx_packet *); void iwm_update_sched(struct iwm_softc *, int, int, uint8_t, uint16_t); @@ -401,12 +393,10 @@ int iwm_mvm_mac_ctxt_changed(struct iwm_ intiwm_mvm_update_quotas(struct iwm_softc *, struct iwm_node *); intiwm_auth(struct iwm_softc *); intiwm_assoc(struct iwm_softc *); -intiwm_release(struct iwm_softc *, struct iwm_node *); struct ieee80211_node *iwm_node_alloc(struct ieee80211com *); void iwm_calib_timeout(void *); void iwm_setrates(struct iwm_node *); intiwm_media_change(struct ifnet *); -void iwm_newstate_cb(void *); intiwm_newstate(struct ieee80211com *, enum ieee80211_state, int); void iwm_endscan_cb(void *); intiwm_init_hw(struct iwm_softc *); @@ -2188,10 +2178,10 @@ iwm_mvm_send_time_event_cmd(struct iwm_s if (sc-sc_capaflags IWM_UCODE_TLV_FLAGS_TIME_EVENT_API_V2) return iwm_mvm_send_cmd_pdu(sc, IWM_TIME_EVENT_CMD, - IWM_CMD_SYNC, sizeof(*cmd), cmd); + IWM_CMD_ASYNC, sizeof(*cmd), cmd);