While measuring Tx performance at a fixed Tx rate with iwm(4) I observed
unexpected dips in throughput measured by tcpbench. These dips coincided
with one or more gap timeouts shown in 'netstat -W iwm0', such as:
77 input block ack window gaps timed out
Which means lost frames on the receive side were stalling subsequent frames
and thus slowing tcpbench down.
I decided to disable the gap timeout entirely to see what would happen if
those missing frames were immediately skipped rather than waiting for them.
The result was stable throughput according to tcpbench.
I then wrote the patch below which keeps the gap timeout intact (it is needed
in case the peer stops sending anything) but skips missing frames at the head
of the Rx block window once a certain amount of frames have queued up. This
heuristics avoids having to wait for the timeout to fire in order to get
frames flowing again if we lose one of more frames during Rx traffic bursts.
I have picked a threshold of 16 outstanding frames based on local testing.
I have no idea if this is a good threshold for everyone. It would help to
get some feedback from tests in other RF environments and other types of
access points. Any regressions?
diff e27fc20afa168944a7605737ac45330f21645404 /usr/src
blob - 098aa9bce19481ce09676ce3c4fc0040f14c9b93
file + sys/net80211/ieee80211_input.c
--- sys/net80211/ieee80211_input.c
+++ sys/net80211/ieee80211_input.c
@@ -67,6 +67,7 @@ void ieee80211_input_ba(struct ieee80211com *, struct
struct mbuf_list *);
void ieee80211_input_ba_flush(struct ieee80211com *, struct ieee80211_node *,
struct ieee80211_rx_ba *, struct mbuf_list *);
+int ieee80211_input_ba_gap_skip(struct ieee80211_rx_ba *);
void ieee80211_input_ba_gap_timeout(void *arg);
void ieee80211_ba_move_window(struct ieee80211com *,
struct ieee80211_node *, u_int8_t, u_int16_t, struct mbuf_list *);
@@ -837,10 +838,24 @@ ieee80211_input_ba(struct ieee80211com *ic, struct mbu
rxi->rxi_flags |= IEEE80211_RXI_AMPDU_DONE;
ba->ba_buf[idx].rxi = *rxi;
- if (ba->ba_buf[ba->ba_head].m == NULL)
- timeout_add_msec(&ba->ba_gap_to, IEEE80211_BA_GAP_TIMEOUT);
- else if (timeout_pending(&ba->ba_gap_to))
- timeout_del(&ba->ba_gap_to);
+ if (ba->ba_buf[ba->ba_head].m == NULL) {
+ if (ba->ba_gapwait < IEEE80211_BA_MAX_GAPWAIT) {
+ if (ba->ba_gapwait == 0)
+ timeout_add_msec(&ba->ba_gap_to,
+ IEEE80211_BA_GAP_TIMEOUT);
+ ba->ba_gapwait++;
+ } else {
+ int skipped = ieee80211_input_ba_gap_skip(ba);
+ ic->ic_stats.is_ht_rx_ba_frame_lost += skipped;
+ ba->ba_gapwait = 0;
+ if (timeout_pending(&ba->ba_gap_to))
+ timeout_del(&ba->ba_gap_to);
+ }
+ } else {
+ ba->ba_gapwait = 0;
+ if (timeout_pending(&ba->ba_gap_to))
+ timeout_del(&ba->ba_gap_to);
+ }
ieee80211_input_ba_flush(ic, ni, ba, ml);
}
@@ -902,6 +917,23 @@ ieee80211_input_ba_flush(struct ieee80211com *ic, stru
ba->ba_winend = (ba->ba_winstart + ba->ba_winsize - 1) & 0xfff;
}
+int
+ieee80211_input_ba_gap_skip(struct ieee80211_rx_ba *ba)
+{
+ int skipped = 0;
+
+ while (skipped < ba->ba_winsize && ba->ba_buf[ba->ba_head].m == NULL) {
+ /* move window forward */
+ ba->ba_head = (ba->ba_head + 1) % IEEE80211_BA_MAX_WINSZ;
+ ba->ba_winstart = (ba->ba_winstart + 1) & 0xfff;
+ skipped++;
+ }
+ if (skipped > 0)
+ ba->ba_winend = (ba->ba_winstart + ba->ba_winsize - 1) & 0xfff;
+
+ return skipped;
+}
+
/*
* Forcibly move the BA window forward to remove a leading gap which has
* been causing frames to linger in the reordering buffer for too long.
@@ -921,17 +953,8 @@ ieee80211_input_ba_gap_timeout(void *arg)
s = splnet();
- skipped = 0;
- while (skipped < ba->ba_winsize && ba->ba_buf[ba->ba_head].m == NULL) {
- /* move window forward */
- ba->ba_head = (ba->ba_head + 1) % IEEE80211_BA_MAX_WINSZ;
- ba->ba_winstart = (ba->ba_winstart + 1) & 0xfff;
- skipped++;
- ic->ic_stats.is_ht_rx_ba_frame_lost++;
- }
- if (skipped > 0)
- ba->ba_winend = (ba->ba_winstart + ba->ba_winsize - 1) & 0xfff;
-
+ skipped = ieee80211_input_ba_gap_skip(ba);
+ ic->ic_stats.is_ht_rx_ba_frame_lost += skipped;
ieee80211_input_ba_flush(ic, ni, ba, &ml);
if_input(&ic->ic_if, &ml);
@@ -2716,6 +2739,7 @@ ieee80211_recv_addba_req(struct ieee80211com *ic, stru
ba->ba_token = token;
timeout_set(&ba->ba_to, ieee80211_rx_ba_timeout, ba);
timeout_set(&ba->ba_gap_to, ieee80211_input_ba_gap_timeout, ba);
+ ba->ba_gapwait = 0;
ba->ba_winsize = bufsz;
if (ba->ba_winsize == 0 || ba->ba_winsize > IEEE80211_BA_MAX_WINSZ)
ba->ba_winsize = IEEE80211_BA_MAX_WINSZ;
@@ -2956,6 +2980,7 @@ ieee80211_recv_delba(struct ieee80211com *ic, struct m
/* stop Block Ack inactivity timer */
timeout_del(&ba->ba_to);
timeout_del(&ba->ba_gap_to);
+ ba->ba_gapwait = 0;
if (ba->ba_buf != NULL) {
/* free all MSDUs stored in reordering buffer */
blob - 4256a8add05c825d9cd25404822b1e147d597325
file + sys/net80211/ieee80211_node.h
--- sys/net80211/ieee80211_node.h
+++ sys/net80211/ieee80211_node.h
@@ -226,6 +226,15 @@ struct ieee80211_rx_ba {
u_int16_t ba_head;
struct timeout ba_gap_to;
#define IEEE80211_BA_GAP_TIMEOUT 300 /* msec */
+
+ /*
+ * Counter for frames forced to wait in the reordering buffer
+ * due to a leading gap caused by one or more missing frames.
+ */
+ int ba_gapwait;
+ /* Missing frames will be skipped once this many frames are waiting. */
+#define IEEE80211_BA_MAX_GAPWAIT 16
+
/* Counter for consecutive frames which missed the BA window. */
int ba_winmiss;
/* Sequence number of previous frame which missed the BA window. */