On Fri, Jul 17, 2020 at 03:59:38PM +0200, Stefan Sperling wrote:
> While measuring Tx performance at a fixed Tx rate with iwm(4) I observed
> unexpected dips in throughput measured by tcpbench. These dips coincided
> with one or more gap timeouts shown in 'netstat -W iwm0', such as:
>       77 input block ack window gaps timed out
> Which means lost frames on the receive side were stalling subsequent frames
> and thus slowing tcpbench down.
> 
> I decided to disable the gap timeout entirely to see what would happen if
> those missing frames were immediately skipped rather than waiting for them.
> The result was stable throughput according to tcpbench.
> 
> I then wrote the patch below which keeps the gap timeout intact (it is needed
> in case the peer stops sending anything) but skips missing frames at the head
> of the Rx block window once a certain amount of frames have queued up. This
> heuristics avoids having to wait for the timeout to fire in order to get
> frames flowing again if we lose one of more frames during Rx traffic bursts.
> 
> I have picked a threshold of 16 outstanding frames based on local testing.
> I have no idea if this is a good threshold for everyone. It would help to
> get some feedback from tests in other RF environments and other types of 
> access points. Any regressions?

Next version.

One problem with the previous patch was that it effectively limited the
size of the BA window to the arbitrarily chosen limit of 16. We should not
drop frames which arrive out of order but still fall within the BA window.

With this version, we allow the entire block ack window (usually 64 frames)
to fill up beyond the missing frame at the head, and only then bypass the
gap timeout handler and skip over the missing frame directly. I can still
trigger this shortcut with tcpbench, and still see the timeout run sometimes.
Direct skip should be faster than having to wait for the timeout to run,
and missing just one out of 64 frames is a common case in my testing.

Also, I am not quite sure if calling if_input() from a timeout is such a
good idea. Any opinions about that? This patch still lets the gap timeout
handler clear the leading gap but avoids flushing buffered frames there.
The peer will now need to send another frame to flush the buffer, but now
if_input() will be called from network interrupt context only. Which is
probably a good thing?

This code still seems to recover well enough from occasional packet loss,
which is what this is all about. If you are on a really bad link, none
of this will help anyway.

diff refs/heads/master refs/heads/ba-gap
blob - 098aa9bce19481ce09676ce3c4fc0040f14c9b93
blob + 4f41b568311bf29e131a3f4802e0a238ba940fe0
--- sys/net80211/ieee80211_input.c
+++ sys/net80211/ieee80211_input.c
@@ -67,6 +67,7 @@ void  ieee80211_input_ba(struct ieee80211com *, struct 
            struct mbuf_list *);
 void   ieee80211_input_ba_flush(struct ieee80211com *, struct ieee80211_node *,
            struct ieee80211_rx_ba *, struct mbuf_list *);
+int    ieee80211_input_ba_gap_skip(struct ieee80211_rx_ba *);
 void   ieee80211_input_ba_gap_timeout(void *arg);
 void   ieee80211_ba_move_window(struct ieee80211com *,
            struct ieee80211_node *, u_int8_t, u_int16_t, struct mbuf_list *);
@@ -837,10 +838,29 @@ ieee80211_input_ba(struct ieee80211com *ic, struct mbu
        rxi->rxi_flags |= IEEE80211_RXI_AMPDU_DONE;
        ba->ba_buf[idx].rxi = *rxi;
 
-       if (ba->ba_buf[ba->ba_head].m == NULL)
-               timeout_add_msec(&ba->ba_gap_to, IEEE80211_BA_GAP_TIMEOUT);
-       else if (timeout_pending(&ba->ba_gap_to))
-               timeout_del(&ba->ba_gap_to);
+       if (ba->ba_buf[ba->ba_head].m == NULL) {
+               if (ba->ba_gapwait < (ba->ba_winsize - 1)) {
+                       if (ba->ba_gapwait == 0) {
+                               timeout_add_msec(&ba->ba_gap_to,
+                                   IEEE80211_BA_GAP_TIMEOUT);
+                       }
+                       ba->ba_gapwait++;
+               } else {
+                       /*
+                        * A full BA window worth of frames is now waiting.
+                        * Skip the missing frame at the head of the window.
+                        */
+                       int skipped = ieee80211_input_ba_gap_skip(ba);
+                       ic->ic_stats.is_ht_rx_ba_frame_lost += skipped;
+                       ba->ba_gapwait = 0;
+                       if (timeout_pending(&ba->ba_gap_to))
+                               timeout_del(&ba->ba_gap_to);
+               }
+       } else {
+               ba->ba_gapwait = 0;
+               if (timeout_pending(&ba->ba_gap_to))
+                       timeout_del(&ba->ba_gap_to);
+       }
 
        ieee80211_input_ba_flush(ic, ni, ba, ml);
 }
@@ -908,10 +928,26 @@ ieee80211_input_ba_flush(struct ieee80211com *ic, stru
  * A leading gap will occur if a particular A-MPDU subframe never arrives
  * or if a bug in the sender causes sequence numbers to jump forward by > 1.
  */
+int
+ieee80211_input_ba_gap_skip(struct ieee80211_rx_ba *ba)
+{
+       int skipped = 0;
+
+       while (skipped < ba->ba_winsize && ba->ba_buf[ba->ba_head].m == NULL) {
+               /* move window forward */
+               ba->ba_head = (ba->ba_head + 1) % IEEE80211_BA_MAX_WINSZ;
+               ba->ba_winstart = (ba->ba_winstart + 1) & 0xfff;
+               skipped++;
+       }
+       if (skipped > 0)
+               ba->ba_winend = (ba->ba_winstart + ba->ba_winsize - 1) & 0xfff;
+
+       return skipped;
+}
+
 void
 ieee80211_input_ba_gap_timeout(void *arg)
 {
-       struct mbuf_list ml = MBUF_LIST_INITIALIZER();
        struct ieee80211_rx_ba *ba = arg;
        struct ieee80211_node *ni = ba->ba_ni;
        struct ieee80211com *ic = ni->ni_ic;
@@ -921,20 +957,9 @@ ieee80211_input_ba_gap_timeout(void *arg)
 
        s = splnet();
 
-       skipped = 0;
-       while (skipped < ba->ba_winsize && ba->ba_buf[ba->ba_head].m == NULL) {
-               /* move window forward */
-               ba->ba_head = (ba->ba_head + 1) % IEEE80211_BA_MAX_WINSZ;
-               ba->ba_winstart = (ba->ba_winstart + 1) & 0xfff;
-               skipped++;
-               ic->ic_stats.is_ht_rx_ba_frame_lost++;
-       }
-       if (skipped > 0)
-               ba->ba_winend = (ba->ba_winstart + ba->ba_winsize - 1) & 0xfff;
+       skipped = ieee80211_input_ba_gap_skip(ba);
+       ic->ic_stats.is_ht_rx_ba_frame_lost += skipped;
 
-       ieee80211_input_ba_flush(ic, ni, ba, &ml);
-       if_input(&ic->ic_if, &ml);
-
        splx(s);        
 }
 
@@ -2716,6 +2741,7 @@ ieee80211_recv_addba_req(struct ieee80211com *ic, stru
        ba->ba_token = token;
        timeout_set(&ba->ba_to, ieee80211_rx_ba_timeout, ba);
        timeout_set(&ba->ba_gap_to, ieee80211_input_ba_gap_timeout, ba);
+       ba->ba_gapwait = 0;
        ba->ba_winsize = bufsz;
        if (ba->ba_winsize == 0 || ba->ba_winsize > IEEE80211_BA_MAX_WINSZ)
                ba->ba_winsize = IEEE80211_BA_MAX_WINSZ;
@@ -2956,6 +2982,7 @@ ieee80211_recv_delba(struct ieee80211com *ic, struct m
                /* stop Block Ack inactivity timer */
                timeout_del(&ba->ba_to);
                timeout_del(&ba->ba_gap_to);
+               ba->ba_gapwait = 0;
 
                if (ba->ba_buf != NULL) {
                        /* free all MSDUs stored in reordering buffer */
blob - 4256a8add05c825d9cd25404822b1e147d597325
blob + 17d982850c16aab271f362a660025a2c0331a499
--- sys/net80211/ieee80211_node.h
+++ sys/net80211/ieee80211_node.h
@@ -226,6 +226,13 @@ struct ieee80211_rx_ba {
        u_int16_t               ba_head;
        struct timeout          ba_gap_to;
 #define IEEE80211_BA_GAP_TIMEOUT       300 /* msec */
+
+       /*
+        * Counter for frames forced to wait in the reordering buffer
+        * due to a leading gap caused by one or more missing frames.
+        */
+       int                     ba_gapwait;
+
        /* Counter for consecutive frames which missed the BA window. */
        int                     ba_winmiss;
        /* Sequence number of previous frame which missed the BA window. */

Reply via email to