fix iwm(4) newstate task (was: Re: iwm(4): make iwm_newstate() interrupt safe)

2015-07-20 Thread Stefan Sperling
On Sun, Jul 19, 2015 at 04:32:39AM +0200, Stefan Sperling wrote:
 Please test this if you use iwm(4). It should make the driver more
 reliable, e.g. when bringing the interface up which sometimes fails
 because of... reasons.

Please test this updated diff instead. The previous one had races between
the ioctl handler and the newstate function, pointed out by kettenis@.
I believe claudio@ has already hit them. I got help from mpi@, too.

Index: if_iwm.c
===
RCS file: /cvs/src/sys/dev/pci/if_iwm.c,v
retrieving revision 1.45
diff -u -p -r1.45 if_iwm.c
--- if_iwm.c15 Jun 2015 08:06:11 -  1.45
+++ if_iwm.c20 Jul 2015 22:33:16 -
@@ -195,14 +195,6 @@ const struct iwm_rate {
 #define IWM_RIDX_IS_CCK(_i_) ((_i_)  IWM_RIDX_OFDM)
 #define IWM_RIDX_IS_OFDM(_i_) ((_i_) = IWM_RIDX_OFDM)
 
-struct iwm_newstate_state {
-   struct task ns_wk;
-   struct ieee80211com *ns_ic;
-   enum ieee80211_state ns_nstate;
-   int ns_arg;
-   int ns_generation;
-};
-
 intiwm_store_cscheme(struct iwm_softc *, uint8_t *, size_t);
 intiwm_firmware_store_section(struct iwm_softc *, enum iwm_ucode_type,
uint8_t *, size_t);
@@ -406,13 +398,13 @@ struct ieee80211_node *iwm_node_alloc(st
 void   iwm_calib_timeout(void *);
 void   iwm_setrates(struct iwm_node *);
 intiwm_media_change(struct ifnet *);
-void   iwm_newstate_cb(void *);
+void   iwm_newstate_task(void *);
 intiwm_newstate(struct ieee80211com *, enum ieee80211_state, int);
 void   iwm_endscan_cb(void *);
 intiwm_init_hw(struct iwm_softc *);
 intiwm_init(struct ifnet *);
 void   iwm_start(struct ifnet *);
-void   iwm_stop(struct ifnet *, int);
+void   iwm_stop(struct ifnet *);
 void   iwm_watchdog(struct ifnet *);
 intiwm_ioctl(struct ifnet *, u_long, iwm_caddr_t);
 const char *iwm_desc_lookup(uint32_t);
@@ -425,9 +417,9 @@ int iwm_match(struct device *, void *, v
 intiwm_preinit(struct iwm_softc *);
 void   iwm_attach_hook(iwm_hookarg_t);
 void   iwm_attach(struct device *, struct device *, void *);
-void   iwm_init_task(void *);
 intiwm_activate(struct device *, int);
-void   iwm_wakeup(struct iwm_softc *);
+void   iwm_suspend(struct iwm_softc *);
+void   iwm_resume(struct iwm_softc *);
 
 #if NBPFILTER  0
 void   iwm_radiotap_attach(struct iwm_softc *);
@@ -5250,40 +5242,27 @@ iwm_media_change(struct ifnet *ifp)
sc-sc_fixed_ridx = ridx;
}
 
-   if ((ifp-if_flags  (IFF_UP | IFF_RUNNING)) ==
-   (IFF_UP | IFF_RUNNING)) {
-   iwm_stop(ifp, 0);
-   error = iwm_init(ifp);
-   }
-   return error;
+   /* 
+* No need to do anything on the hardware side.
+* Channel changes will be applied during the
+* association sequence in iwm_auth().
+*/
+
+   return (0);
 }
 
 void
-iwm_newstate_cb(void *wk)
+iwm_newstate_task(void *arg)
 {
-   struct iwm_newstate_state *iwmns = (void *)wk;
-   struct ieee80211com *ic = iwmns-ns_ic;
-   enum ieee80211_state nstate = iwmns-ns_nstate;
-   int generation = iwmns-ns_generation;
+   struct iwm_softc *sc = arg;
+   struct ieee80211com *ic = sc-sc_ic;
+   struct ifnet *ifp = IC2IFP(sc-sc_ic);
+   struct iwm_newstate_task_arg *task_arg = sc-sc_newstate_task_arg;
+   enum ieee80211_state nstate = task_arg-state;
struct iwm_node *in;
-   int arg = iwmns-ns_arg;
-   struct ifnet *ifp = IC2IFP(ic);
-   struct iwm_softc *sc = ifp-if_softc;
int error;
 
-   free(iwmns, M_DEVBUF, sizeof(*iwmns));
-
-   DPRINTF((Prepare to switch state %d-%d\n, ic-ic_state, nstate));
-   if (sc-sc_generation != generation) {
-   DPRINTF((newstate_cb: someone pulled the plug meanwhile\n));
-   if (nstate == IEEE80211_S_INIT) {
-   DPRINTF((newstate_cb: nstate == IEEE80211_S_INIT: 
calling sc_newstate()\n));
-   sc-sc_newstate(ic, nstate, arg);
-   }
-   return;
-   }
-
-   DPRINTF((switching state %d-%d\n, ic-ic_state, nstate));
+   sc-sc_newstate_errno = 0;
 
if (ic-ic_state == IEEE80211_S_SCAN  nstate != ic-ic_state)
iwm_led_blink_stop(sc);
@@ -5292,6 +5271,8 @@ iwm_newstate_cb(void *wk)
if (ic-ic_state == IEEE80211_S_RUN  nstate != ic-ic_state) {
iwm_mvm_disable_beacon_filter(sc, (void *)ic-ic_bss);
 
+   timeout_del(sc-sc_calib_to);
+
if (((in = (void *)ic-ic_bss) != NULL))
in-in_assoc = 0;
iwm_release(sc, NULL);
@@ -5310,8 +5291,9 @@ iwm_newstate_cb(void *wk)
if (nstate == IEEE80211_S_SCAN ||
nstate == IEEE80211_S_AUTH ||
nstate == IEEE80211_S_ASSOC) {
-   DPRINTF((Force transition to INIT; MGT=%d\n, arg));
-   

Re: iwm(4): make iwm_newstate() interrupt safe

2015-07-18 Thread Stefan Sperling
On Thu, Jun 18, 2015 at 04:23:43PM +0200, Stefan Sperling wrote:
 The net80211 stack assumes drivers will switch IEEE80211_S_* states in
 interrupt context. iwm(4) does not follow this rule. Since it insists on 
 responses from firmware commands to look for success or failure and it
 uses tsleep() to wait for responses it cannot switch state in interrupt
 context. So currently, the entire state machine is deferred to process
 context (big hammer solution) :-/
 
 Complications arise in the suspend/resume path because of this, such as
 http://marc.info/?l=openbsd-techm=143438073018743w=2 apart from several
 other such issues where a failure on part of the firmware to respond will
 deadlock the driver in an endless tsleep.
 
 I would very much like iwm_newstate() to be interrupt safe and get rid of
 the pesky newstate_cb task which wraps it. It makes debugging and following
 the control flow difficult. And I hope the driver will be more stable overall.
 
 There are two ways to approach this:
 
  - Simply don't care about answers from firmware when in interrupt
(note that this is what iwn(4) does)
 
  - Busy-wait for replies from the firmware when in interrupt

Here's a diff implementing a third approach, discussed with mpi@.

 - Keep the newstate transitions in a task thread, but only ever
   schedule one 80211 state transition at a time.

Requires a tweak for suspend/resume, which wants to run two state
transitions at resume time if the interface was up during suspend
(back to INIT, then INIT - SCAN).

Please test this if you use iwm(4). It should make the driver more
reliable, e.g. when bringing the interface up which sometimes fails
because of... reasons.

Index: if_iwm.c
===
RCS file: /cvs/src/sys/dev/pci/if_iwm.c,v
retrieving revision 1.45
diff -u -p -r1.45 if_iwm.c
--- if_iwm.c15 Jun 2015 08:06:11 -  1.45
+++ if_iwm.c19 Jul 2015 02:13:13 -
@@ -195,14 +195,6 @@ const struct iwm_rate {
 #define IWM_RIDX_IS_CCK(_i_) ((_i_)  IWM_RIDX_OFDM)
 #define IWM_RIDX_IS_OFDM(_i_) ((_i_) = IWM_RIDX_OFDM)
 
-struct iwm_newstate_state {
-   struct task ns_wk;
-   struct ieee80211com *ns_ic;
-   enum ieee80211_state ns_nstate;
-   int ns_arg;
-   int ns_generation;
-};
-
 intiwm_store_cscheme(struct iwm_softc *, uint8_t *, size_t);
 intiwm_firmware_store_section(struct iwm_softc *, enum iwm_ucode_type,
uint8_t *, size_t);
@@ -406,13 +398,13 @@ struct ieee80211_node *iwm_node_alloc(st
 void   iwm_calib_timeout(void *);
 void   iwm_setrates(struct iwm_node *);
 intiwm_media_change(struct ifnet *);
-void   iwm_newstate_cb(void *);
+void   iwm_newstate_task(void *);
 intiwm_newstate(struct ieee80211com *, enum ieee80211_state, int);
 void   iwm_endscan_cb(void *);
 intiwm_init_hw(struct iwm_softc *);
 intiwm_init(struct ifnet *);
 void   iwm_start(struct ifnet *);
-void   iwm_stop(struct ifnet *, int);
+void   iwm_stop(struct ifnet *);
 void   iwm_watchdog(struct ifnet *);
 intiwm_ioctl(struct ifnet *, u_long, iwm_caddr_t);
 const char *iwm_desc_lookup(uint32_t);
@@ -427,7 +419,8 @@ voidiwm_attach_hook(iwm_hookarg_t);
 void   iwm_attach(struct device *, struct device *, void *);
 void   iwm_init_task(void *);
 intiwm_activate(struct device *, int);
-void   iwm_wakeup(struct iwm_softc *);
+void   iwm_suspend(struct iwm_softc *);
+void   iwm_resume(struct iwm_softc *);
 
 #if NBPFILTER  0
 void   iwm_radiotap_attach(struct iwm_softc *);
@@ -5252,38 +5245,25 @@ iwm_media_change(struct ifnet *ifp)
 
if ((ifp-if_flags  (IFF_UP | IFF_RUNNING)) ==
(IFF_UP | IFF_RUNNING)) {
-   iwm_stop(ifp, 0);
+   iwm_stop(ifp);
error = iwm_init(ifp);
}
return error;
 }
 
 void
-iwm_newstate_cb(void *wk)
+iwm_newstate_task(void *arg)
 {
-   struct iwm_newstate_state *iwmns = (void *)wk;
-   struct ieee80211com *ic = iwmns-ns_ic;
-   enum ieee80211_state nstate = iwmns-ns_nstate;
-   int generation = iwmns-ns_generation;
+   struct iwm_softc *sc = arg;
+   struct ieee80211com *ic = sc-sc_ic;
+   struct iwm_newstate_task_arg *task_arg = sc-sc_newstate_task_arg;
+   enum ieee80211_state nstate = task_arg-state;
struct iwm_node *in;
-   int arg = iwmns-ns_arg;
-   struct ifnet *ifp = IC2IFP(ic);
-   struct iwm_softc *sc = ifp-if_softc;
int error;
 
-   free(iwmns, M_DEVBUF, sizeof(*iwmns));
-
-   DPRINTF((Prepare to switch state %d-%d\n, ic-ic_state, nstate));
-   if (sc-sc_generation != generation) {
-   DPRINTF((newstate_cb: someone pulled the plug meanwhile\n));
-   if (nstate == IEEE80211_S_INIT) {
-   DPRINTF((newstate_cb: nstate == IEEE80211_S_INIT: 
calling sc_newstate()\n));
-   sc-sc_newstate(ic, nstate, arg);
-   }
-   

iwm(4): make iwm_newstate() interrupt safe

2015-06-18 Thread Stefan Sperling
The net80211 stack assumes drivers will switch IEEE80211_S_* states in
interrupt context. iwm(4) does not follow this rule. Since it insists on 
responses from firmware commands to look for success or failure and it
uses tsleep() to wait for responses it cannot switch state in interrupt
context. So currently, the entire state machine is deferred to process
context (big hammer solution) :-/

Complications arise in the suspend/resume path because of this, such as
http://marc.info/?l=openbsd-techm=143438073018743w=2 apart from several
other such issues where a failure on part of the firmware to respond will
deadlock the driver in an endless tsleep.

I would very much like iwm_newstate() to be interrupt safe and get rid of
the pesky newstate_cb task which wraps it. It makes debugging and following
the control flow difficult. And I hope the driver will be more stable overall.

There are two ways to approach this:

 - Simply don't care about answers from firmware when in interrupt
   (note that this is what iwn(4) does)

 - Busy-wait for replies from the firmware when in interrupt

The diff below is an attempt at the first approach simply because it can
be written with less code (no busy waits). And if we ignore an error response
and the firmware freaks out, well, we'll eventually handle the problem
by resetting the hardware. There's nothing much else we could do if a
command fails to complete after busy-wait anyway.

I would very much like other people to test this change to ensure
I'm not breaking some other use case I can't cover. And perhaps there's
a condition where a busy-wait is necessary to ensure proper operation?

I've tested suspend/resume, and switching associations between a couple
of different APs, and interface down/up on the AP side. I'm not seeing
any regressions apart from the fact that the firmware will crash if a
scan is run while the interface is up -- bringing the interface back up
is possible so this is not entirely horrible.
Note that we must get rid of iwm_release() or we break suspend/resume and
de-auth from AP, because iwm_release() calls hw_init() which will tsleep()
for loading firmware. This is probably related to why scans don't work
while we're up. However, the way the driver currently handles RUN-SCAN
is pretty broken as well since it merely provides an illusion of it working
and doesn't actually know how to handle this transition in firmware/hardware.

Index: if_iwm.c
===
RCS file: /cvs/src/sys/dev/pci/if_iwm.c,v
retrieving revision 1.45
diff -u -p -r1.45 if_iwm.c
--- if_iwm.c15 Jun 2015 08:06:11 -  1.45
+++ if_iwm.c18 Jun 2015 13:29:46 -
@@ -195,14 +195,6 @@ const struct iwm_rate {
 #define IWM_RIDX_IS_CCK(_i_) ((_i_)  IWM_RIDX_OFDM)
 #define IWM_RIDX_IS_OFDM(_i_) ((_i_) = IWM_RIDX_OFDM)
 
-struct iwm_newstate_state {
-   struct task ns_wk;
-   struct ieee80211com *ns_ic;
-   enum ieee80211_state ns_nstate;
-   int ns_arg;
-   int ns_generation;
-};
-
 intiwm_store_cscheme(struct iwm_softc *, uint8_t *, size_t);
 intiwm_firmware_store_section(struct iwm_softc *, enum iwm_ucode_type,
uint8_t *, size_t);
@@ -338,9 +330,9 @@ int iwm_send_cmd(struct iwm_softc *, str
 intiwm_mvm_send_cmd_pdu(struct iwm_softc *, uint8_t, uint32_t, uint16_t,
const void *);
 intiwm_mvm_send_cmd_status(struct iwm_softc *, struct iwm_host_cmd *,
-   uint32_t *);
+   uint32_t *, int);
 intiwm_mvm_send_cmd_pdu_status(struct iwm_softc *, uint8_t,
-   uint16_t, const void *, uint32_t *);
+   uint16_t, const void *, uint32_t *, 
int);
 void   iwm_free_resp(struct iwm_softc *, struct iwm_host_cmd *);
 void   iwm_cmd_done(struct iwm_softc *, struct iwm_rx_packet *);
 void   iwm_update_sched(struct iwm_softc *, int, int, uint8_t, uint16_t);
@@ -401,12 +393,10 @@ int   iwm_mvm_mac_ctxt_changed(struct iwm_
 intiwm_mvm_update_quotas(struct iwm_softc *, struct iwm_node *);
 intiwm_auth(struct iwm_softc *);
 intiwm_assoc(struct iwm_softc *);
-intiwm_release(struct iwm_softc *, struct iwm_node *);
 struct ieee80211_node *iwm_node_alloc(struct ieee80211com *);
 void   iwm_calib_timeout(void *);
 void   iwm_setrates(struct iwm_node *);
 intiwm_media_change(struct ifnet *);
-void   iwm_newstate_cb(void *);
 intiwm_newstate(struct ieee80211com *, enum ieee80211_state, int);
 void   iwm_endscan_cb(void *);
 intiwm_init_hw(struct iwm_softc *);
@@ -2188,10 +2178,10 @@ iwm_mvm_send_time_event_cmd(struct iwm_s
 
if (sc-sc_capaflags  IWM_UCODE_TLV_FLAGS_TIME_EVENT_API_V2)
return iwm_mvm_send_cmd_pdu(sc, IWM_TIME_EVENT_CMD,
-   IWM_CMD_SYNC, sizeof(*cmd), cmd);
+   IWM_CMD_ASYNC, sizeof(*cmd), cmd);