There are buggy APs which emit sequence numbers like 1889, 2501, 1890,
1891, 1892, ... A jump like this causes the blockack code to move the
expected sequence number window forward to 2501 and drop all incoming
frames between 1889 and 2501. Eventually the numbers wrap and traffic
starts flowing again until the next fluke frame comes in. 

The following diffsadds a heuristic which detects this problem.
It cannot be perfect since it's a heuristic. I've tried guessing
reasonable values to guide it. Does this look reasonable?

Tested by myself and krw@ who owns one such problematic AP and was
suffering the consequences while wifi just worked for everyone else...

Index: ieee80211_input.c
===================================================================
RCS file: /cvs/src/sys/net80211/ieee80211_input.c,v
retrieving revision 1.158
diff -u -p -r1.158 ieee80211_input.c
--- ieee80211_input.c   5 Feb 2016 19:42:04 -0000       1.158
+++ ieee80211_input.c   7 Feb 2016 23:31:08 -0000
@@ -712,6 +712,37 @@ ieee80211_input_ba(struct ieee80211com *
                return;
        }
        if (SEQ_LT(ba->ba_winend, sn)) {        /* WinEndB < SN */
+               /* 
+                * If this frame would move the window outside the range of
+                * winend + winsize, drop it. This is likely a fluke and the
+                * next frame will fit into the window again. Allowing the
+                * window to be moved too far ahead makes us drop frames
+                * until their sequence numbers catch up with the new window.
+                *
+                * However, if the window really did move arbitrarily, we must
+                * allow it to move forward. We try to detect this condition
+                * by counting missed consecutive frames.
+                *
+                * Works around buggy behaviour observed with Broadcom-based
+                * APs, which emit "sequence" numbers such as 1888, 1889, 2501,
+                * 1890, 1891, ... all for the same TID.
+                */
+               if (((sn - ba->ba_winend) & 0xfff) > IEEE80211_BA_MAX_WINSZ) {
+                       if (ba->ba_winmiss < IEEE80211_BA_MAX_WINMISS) { 
+                               if (ba->ba_missedsn == sn - 1)
+                                       ba->ba_winmiss++;
+                               else
+                                       ba->ba_winmiss = 0;
+                               ba->ba_missedsn = sn;
+                               ifp->if_ierrors++;
+                               m_freem(m);     /* discard the MPDU */
+                               return;
+                       }
+
+                       /* It appears the window has moved for real. */
+                       ba->ba_winmiss = 0;
+                       ba->ba_missedsn = 0;
+               }
                count = (sn - ba->ba_winend) & 0xfff;
                if (count > ba->ba_winsize)     /* no overlap */
                        count = ba->ba_winsize;
Index: ieee80211_node.h
===================================================================
RCS file: /cvs/src/sys/net80211/ieee80211_node.h,v
retrieving revision 1.56
diff -u -p -r1.56 ieee80211_node.h
--- ieee80211_node.h    5 Feb 2016 16:07:57 -0000       1.56
+++ ieee80211_node.h    7 Feb 2016 23:29:47 -0000
@@ -150,6 +150,12 @@ struct ieee80211_rx_ba {
        u_int16_t               ba_head;
        struct timeout          ba_gap_to;
 #define IEEE80211_BA_GAP_TIMEOUT       500 /* msec */
+       /* Counter for consecutive frames which missed the BA window. */
+       int                     ba_winmiss;
+       /* Sequence number of previous frame which missed the BA window. */
+       uint16_t                ba_missedsn;
+       /* Window moves forward after this many frames have missed it. */
+#define IEEE80211_BA_MAX_WINMISS       8
 };
 
 /*

Reply via email to