On Wed, Nov 17, 2021 at 07:53:44PM +0100, Stefan Sperling wrote:
> On Wed, Nov 17, 2021 at 03:25:36PM +0000, Mikolaj Kucharski wrote:
> > On Mon, Nov 15, 2021 at 12:33:09PM +0000, Mikolaj Kucharski wrote:
> > > I have more panics, with couple of minutes gap between
> > > ieee80211_setkeysdone() from ieee80211_proto.c and panic
> > > in ieee80211_encrypt() from ieee80211_crypto.c.
> > > 
> > > Sorry for wrapped lines, but I added Time::HiRes in a rush,
> > > the console grabbing script.
> > > 
> > 
> > Improved console grabbing script, and also added address where the key
> > is stored via printf()'s %p. This is full boot up with more debugging.
> > I've added printf()'s all but one for memset()'s in net80211/. Not sure
> > is full diff interesting here.
> > 
> > What is strange, I would imagine, that address of they key is one
> > between those two:
> 
> I still cannot make conclusive sense of this.

More off-list investigation was successful.
We can explain the problem as follows:

The frame which triggers this panic seems to be a unicast frame
which was moved from ni_savedq to ic_pwrsaveq during DTIM (see
function ieee80211_notify_dtim() in ieee80211_node.c).

The if_start() routine of athn(4) pulls frames off ic_pwrsaveq and sends
them out. When a node disappears we clear its ni_savedq, but any frames
for this node queued on ic_pwrsaveq are not removed! The next time
athn's if_start runs a frame buffered on ic_pwrsaveq will be sent,
with the node already in COLLECT state and its WPA key having been
cleared. If the "swcrypto key check" doesn't trigger we may end up
with a use-after-free crash instead (the node pointer is stored in
the mbuf's packet header and becomes invalid once the node is freed).

To fix this we can teach the net80211 stack to remove frames from
ic_pwrsaveq when a client node has decided to leave the AP.
Patch below, which also fixes missing node refcount adjustments when
frames get purged.  ok?

Mikolaj has been running this for a while, and has confirmed that the
DTIM queue removal case in ieee80211_node_leave_pwrsave() does trigger.

diff 114b285775411c80e68d9031cf2b80c83c75d07f /usr/src
blob - 116d71f94d678686994b0de328dcf0735bac024d
file + sys/net80211/ieee80211_node.c
--- sys/net80211/ieee80211_node.c
+++ sys/net80211/ieee80211_node.c
@@ -87,6 +87,8 @@ void ieee80211_node_join_11g(struct ieee80211com *, st
 void ieee80211_node_leave_ht(struct ieee80211com *, struct ieee80211_node *);
 void ieee80211_node_leave_rsn(struct ieee80211com *, struct ieee80211_node *);
 void ieee80211_node_leave_11g(struct ieee80211com *, struct ieee80211_node *);
+void ieee80211_node_leave_pwrsave(struct ieee80211com *,
+    struct ieee80211_node *);
 void ieee80211_inact_timeout(void *);
 void ieee80211_node_cache_timeout(void *);
 #endif
@@ -2790,6 +2792,39 @@ ieee80211_node_leave_11g(struct ieee80211com *ic, stru
        }
 }
 
+void
+ieee80211_node_leave_pwrsave(struct ieee80211com *ic,
+    struct ieee80211_node *ni)
+{
+       struct mbuf_queue keep = MBUF_QUEUE_INITIALIZER(IFQ_MAXLEN, IPL_NET);
+       struct mbuf *m;
+
+       if (ni->ni_pwrsave == IEEE80211_PS_DOZE)
+               ni->ni_pwrsave = IEEE80211_PS_AWAKE;
+
+       if (mq_len(&ni->ni_savedq) > 0) {
+               if (ic->ic_set_tim != NULL)
+                       (*ic->ic_set_tim)(ic, ni->ni_associd, 0);
+       }
+       while ((m = mq_dequeue(&ni->ni_savedq)) != NULL) {
+               if (ni->ni_refcnt > 0)
+                       ieee80211_node_decref(ni);
+               m_freem(m);
+       }
+
+       /* Purge frames queued for transmission during DTIM. */
+       while ((m = mq_dequeue(&ic->ic_pwrsaveq)) != NULL) {
+               if (m->m_pkthdr.ph_cookie == ni) {
+                       if (ni->ni_refcnt > 0)
+                               ieee80211_node_decref(ni);
+                       m_freem(m);
+               } else
+                       mq_enqueue(&keep, m);
+       }
+       while ((m = mq_dequeue(&keep)) != NULL)
+               mq_enqueue(&ic->ic_pwrsaveq, m);
+}
+
 /*
  * Handle bookkeeping for station deauthentication/disassociation
  * when operating as an ap.
@@ -2811,14 +2846,8 @@ ieee80211_node_leave(struct ieee80211com *ic, struct i
                return;
        }
 
-       if (ni->ni_pwrsave == IEEE80211_PS_DOZE)
-               ni->ni_pwrsave = IEEE80211_PS_AWAKE;
+       ieee80211_node_leave_pwrsave(ic, ni);
 
-       if (mq_purge(&ni->ni_savedq) > 0) {
-               if (ic->ic_set_tim != NULL)
-                       (*ic->ic_set_tim)(ic, ni->ni_associd, 0);
-       }
-
        if (ic->ic_flags & IEEE80211_F_RSNON)
                ieee80211_node_leave_rsn(ic, ni);
 

Reply via email to