iwm(4) has been using a firmware-based approach to retrying failed
Tx attempts with successively lower data rates. This is suboptimal
for our kernel's Tx rate selection algorithms, which are called
MiRA (for 11n) and AMRR (for 11a/b/g).

I have observed a mismatch between the Tx rate reported by ifconfig iwm0
and the Rx rate reported at the receiving end, and this diff fixes
that issue for me.

Because iwm firmware retries on lower rates, our own rate selection
is fooled into believing that high rates succeed when they in fact
don't succeed. This causes more Tx retries than necessary, which
results in lower throughput than we could achieve otherwise.

With the diff below we instead ask the firmware to always keep retrying
at a constant Tx rate. This provides more accurate feedback to MiRa and
AMRR, which are then able to select an initial Tx rate which is more
likely to succeed.

However, there is another reason why we were using firmware-based retries.
In 11n mode, net80211 only knows about rates which are based on OFDM.
On 2GHz channels, the older CCK modulation provides a larger range and can
thus make the difference between a bad network connection and no network
connection at all. We were relying on the firmware to retry with CCK if
OFDM frames failed so I had to implement a similar strategy in the driver.
This is achived by falling back to CCK rates and running AMRR instead of
MiRA if the lowest OFDM rate is failing, until AMRR decides to start using
OFDM again, and then switch back to OFDM + MiRA. Getting the driver to switch
back to OFDM once it is using CCK wasn't straightforward. I eventually found
out that feeding only actual Tx failures to AMRR makes this work well.

I had to remove 11n support from AMRR which interfered with this fallback.
This is overdue anyway since all drivers are using MiRa in 11n mode now.

When testing this, please compare throughput before and after this diff.
I run tcpbench -s on a host behind the AP and start a tcpbench client on
my iwm laptop. With this diff, and an 11ac AP on channel 149, tcpbench
measures up to 26 Mpbs in my environment, with an average rate above 20 Mbps.

Also pay attention to changes in latency. To test this, on my iwm laptop
I run ping and rain(6) through an SSH connection to a machine behind the AP.

Tested on a 7265 device only so far.

iwn(4) has the same problem and should also be fixed once this change
has made it into the tree.

diff 3cfc9cae129c023be655a6d31902cddd094fdec4 /usr/src
blob - 45e4446d79d2da0f847170e34b9a01e25b71ea62
file + sys/dev/pci/if_iwm.c
--- sys/dev/pci/if_iwm.c
+++ sys/dev/pci/if_iwm.c
@@ -217,6 +217,7 @@ const struct iwm_rate {
 #define IWM_RIDX_MAX   (nitems(iwm_rates)-1)
 #define IWM_RIDX_IS_CCK(_i_) ((_i_) < IWM_RIDX_OFDM)
 #define IWM_RIDX_IS_OFDM(_i_) ((_i_) >= IWM_RIDX_OFDM)
+#define IWM_RVAL_IS_OFDM(_i_) ((_i_) >= 12 && (_i_) != 22)
 
 /* Convert an MCS index into an iwm_rates[] index. */
 const int iwm_mcs2ridx[] = {
@@ -368,6 +369,7 @@ void        iwm_rx_rx_phy_cmd(struct iwm_softc *, struct 
iwm_
 int    iwm_get_noise(const struct iwm_statistics_rx_non_phy *);
 void   iwm_rx_rx_mpdu(struct iwm_softc *, struct iwm_rx_packet *,
            struct iwm_rx_data *);
+void   iwm_enable_ht_cck_fallback(struct iwm_softc *, struct iwm_node *);
 void   iwm_rx_tx_cmd_single(struct iwm_softc *, struct iwm_rx_packet *,
            struct iwm_node *);
 void   iwm_rx_tx_cmd(struct iwm_softc *, struct iwm_rx_packet *,
@@ -447,7 +449,6 @@ int iwm_run(struct iwm_softc *);
 int    iwm_run_stop(struct iwm_softc *);
 struct ieee80211_node *iwm_node_alloc(struct ieee80211com *);
 void   iwm_calib_timeout(void *);
-void   iwm_setrates(struct iwm_node *);
 int    iwm_media_change(struct ifnet *);
 void   iwm_newstate_task(void *);
 int    iwm_newstate(struct ieee80211com *, enum ieee80211_state, int);
@@ -3574,6 +3575,37 @@ iwm_rx_rx_mpdu(struct iwm_softc *sc, struct iwm_rx_pac
 }
 
 void
+iwm_enable_ht_cck_fallback(struct iwm_softc *sc, struct iwm_node *in)
+{
+       struct ieee80211com *ic = &sc->sc_ic;
+       struct ieee80211_node *ni = &in->in_ni;
+       struct ieee80211_rateset *rs = &ni->ni_rates;
+       uint8_t rval = (rs->rs_rates[ni->ni_txrate] & IEEE80211_RATE_VAL);
+       uint8_t min_rval = ieee80211_min_basic_rate(ic);
+       int i;
+
+       /* Are CCK frames forbidden in our BSS? */
+       if (IWM_RVAL_IS_OFDM(min_rval))
+               return;
+
+       in->ht_force_cck = 1;
+
+       ieee80211_mira_cancel_timeouts(&in->in_mn);
+       ieee80211_mira_node_init(&in->in_mn);
+       ieee80211_amrr_node_init(&sc->sc_amrr, &in->in_amn);
+
+       /* Choose initial CCK Tx rate. */
+       ni->ni_txrate = 0;
+       for (i = 0; i < rs->rs_nrates; i++) {
+               rval = (rs->rs_rates[i] & IEEE80211_RATE_VAL);
+               if (rval == min_rval) {
+                       ni->ni_txrate = i;
+                       break;
+               }
+       }
+}
+
+void
 iwm_rx_tx_cmd_single(struct iwm_softc *sc, struct iwm_rx_packet *pkt,
     struct iwm_node *in)
 {
@@ -3590,12 +3622,18 @@ iwm_rx_tx_cmd_single(struct iwm_softc *sc, struct iwm_
            status != IWM_TX_STATUS_DIRECT_DONE);
 
        /* Update rate control statistics. */
-       if ((ni->ni_flags & IEEE80211_NODE_HT) == 0) {
+       if ((ni->ni_flags & IEEE80211_NODE_HT) == 0 || in->ht_force_cck) {
                in->in_amn.amn_txcnt++;
-               if (tx_resp->failure_frame > 0)
+               if (in->ht_force_cck) {
+                       /*
+                        * We want to move back to OFDM quickly if possible.
+                        * Only show actual Tx failures to AMRR, not retries.
+                        */
+                       if (txfail)
+                               in->in_amn.amn_retrycnt++;
+               } else if (tx_resp->failure_frame > 0)
                        in->in_amn.amn_retrycnt++;
        } else if (ic->ic_fixed_mcs == -1) {
-               int omcs = ni->ni_txmcs;
                in->in_mn.frames += tx_resp->frame_count;
                in->in_mn.ampdu_size = le16toh(tx_resp->byte_cnt);
                in->in_mn.agglen = tx_resp->frame_count;
@@ -3603,14 +3641,16 @@ iwm_rx_tx_cmd_single(struct iwm_softc *sc, struct iwm_
                        in->in_mn.retries += tx_resp->failure_frame;
                if (txfail)
                        in->in_mn.txfail += tx_resp->frame_count;
-               if (ic->ic_state == IEEE80211_S_RUN)
+               if (ic->ic_state == IEEE80211_S_RUN && !in->ht_force_cck) {
+                       int otxmcs = ni->ni_txmcs;
+
                        ieee80211_mira_choose(&in->in_mn, ic, &in->in_ni);
-               /* 
-                * If MiRA has chosen a new TX rate we must update
-                * the firwmare's LQ rate table from process context.
-                */
-               if (omcs != ni->ni_txmcs)
-                       iwm_add_task(sc, systq, &sc->setrates_task);
+
+                       /* Fall back to CCK rates if MCS 0 is failing. */
+                       if (txfail && IEEE80211_IS_CHAN_2GHZ(ni->ni_chan) &&
+                           otxmcs == 0 && ni->ni_txmcs == 0)
+                               iwm_enable_ht_cck_fallback(sc, in);
+               }
        }
 
        if (txfail)
@@ -4112,41 +4152,32 @@ iwm_tx_fill_cmd(struct iwm_softc *sc, struct iwm_node 
 {
        struct ieee80211com *ic = &sc->sc_ic;
        struct ieee80211_node *ni = &in->in_ni;
+       struct ieee80211_rateset *rs = &ni->ni_rates;
        const struct iwm_rate *rinfo;
        int type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
-       int ridx, rate_flags, i;
-       int nrates = ni->ni_rates.rs_nrates;
+       int min_ridx = iwm_rval2ridx(ieee80211_min_basic_rate(ic));
+       int ridx, rate_flags;
 
        tx->rts_retry_limit = IWM_RTS_DFAULT_RETRY_LIMIT;
-       tx->data_retry_limit = IWM_DEFAULT_TX_RETRY;
+       tx->data_retry_limit = IWM_LOW_RETRY_LIMIT;
 
        if (IEEE80211_IS_MULTICAST(wh->i_addr1) ||
            type != IEEE80211_FC0_TYPE_DATA) {
                /* for non-data, use the lowest supported rate */
-               ridx = iwm_rval2ridx(ieee80211_min_basic_rate(ic));
+               ridx = min_ridx;
                tx->data_retry_limit = IWM_MGMT_DFAULT_RETRY_LIMIT;
        } else if (ic->ic_fixed_mcs != -1) {
                ridx = sc->sc_fixed_ridx;
        } else if (ic->ic_fixed_rate != -1) {
                ridx = sc->sc_fixed_ridx;
+       } else if ((ni->ni_flags & IEEE80211_NODE_HT) && !in->ht_force_cck) {
+               ridx = iwm_mcs2ridx[ni->ni_txmcs];
        } else {
-               /* for data frames, use RS table */
-               tx->initial_rate_index = 0;
-               tx->tx_flags |= htole32(IWM_TX_CMD_FLG_STA_RATE);
-               if (ni->ni_flags & IEEE80211_NODE_HT) {
-                       ridx = iwm_mcs2ridx[ni->ni_txmcs];
-                       return &iwm_rates[ridx];
-               }
-               ridx = (IEEE80211_IS_CHAN_5GHZ(ni->ni_chan)) ?
-                   IWM_RIDX_OFDM : IWM_RIDX_CCK;
-               for (i = 0; i < nrates; i++) {
-                       if (iwm_rates[i].rate == (ni->ni_txrate &
-                           IEEE80211_RATE_VAL)) {
-                               ridx = i;
-                               break;
-                       }
-               }
-               return &iwm_rates[ridx];
+               uint8_t rval;
+               rval = (rs->rs_rates[ni->ni_txrate] & IEEE80211_RATE_VAL);
+               ridx = iwm_rval2ridx(rval);
+               if (ridx < min_ridx)
+                       ridx = min_ridx;
        }
 
        rinfo = &iwm_rates[ridx];
@@ -4228,7 +4259,7 @@ iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ie
                if ((ni->ni_flags & IEEE80211_NODE_HT) &&
                    !IEEE80211_IS_MULTICAST(wh->i_addr1) &&
                    type == IEEE80211_FC0_TYPE_DATA &&
-                   rinfo->plcp == IWM_RATE_INVM_PLCP) {
+                   rinfo->ht_plcp != IWM_RATE_HT_SISO_MCS_INV_PLCP) {
                        tap->wt_rate = (0x80 | rinfo->ht_plcp);
                } else
                        tap->wt_rate = rinfo->rate;
@@ -5172,6 +5203,8 @@ iwm_rval2ridx(int rval)
        int ridx;
 
        for (ridx = 0; ridx < nitems(iwm_rates); ridx++) {
+               if (iwm_rates[ridx].plcp == IWM_RATE_INVM_PLCP)
+                       continue;
                if (rval == iwm_rates[ridx].rate)
                        break;
        }
@@ -5837,7 +5870,6 @@ iwm_run(struct iwm_softc *sc)
        /* Start at lowest available bit-rate, AMRR will raise. */
        in->in_ni.ni_txrate = 0;
        in->in_ni.ni_txmcs = 0;
-       iwm_setrates(in);
 
        timeout_add_msec(&sc->sc_calib_to, 500);
        iwm_led_enable(sc);
@@ -5900,159 +5932,27 @@ iwm_calib_timeout(void *arg)
        struct ieee80211com *ic = &sc->sc_ic;
        struct iwm_node *in = (void *)ic->ic_bss;
        struct ieee80211_node *ni = &in->in_ni;
-       int s, otxrate;
+       int s;
 
        s = splnet();
        if ((ic->ic_fixed_rate == -1 || ic->ic_fixed_mcs == -1) &&
-           ((ni->ni_flags & IEEE80211_NODE_HT) == 0) &&
+           ((ni->ni_flags & IEEE80211_NODE_HT) == 0 || in->ht_force_cck) &&
            ic->ic_opmode == IEEE80211_M_STA && ic->ic_bss) {
-               otxrate = ni->ni_txrate;
                ieee80211_amrr_choose(&sc->sc_amrr, &in->in_ni, &in->in_amn);
-               /* 
-                * If AMRR has chosen a new TX rate we must update
-                * the firwmare's LQ rate table from process context.
-                */
-               if (otxrate != ni->ni_txrate)
-                       iwm_add_task(sc, systq, &sc->setrates_task);
+               if (in->ht_force_cck) {
+                       struct ieee80211_rateset *rs = &ni->ni_rates;
+                       uint8_t rv;
+                       rv = (rs->rs_rates[ni->ni_txrate] & IEEE80211_RATE_VAL);
+                       if (IWM_RVAL_IS_OFDM(rv))
+                               in->ht_force_cck = 0;
+               }
        }
+
        splx(s);
 
        timeout_add_msec(&sc->sc_calib_to, 500);
 }
 
-void
-iwm_setrates_task(void *arg)
-{
-       struct iwm_softc *sc = arg;
-       struct ieee80211com *ic = &sc->sc_ic;
-       struct iwm_node *in = (struct iwm_node *)ic->ic_bss;
-       int s = splnet();
-
-       if (sc->sc_flags & IWM_FLAG_SHUTDOWN) {
-               refcnt_rele_wake(&sc->task_refs);
-               splx(s);
-               return;
-       }
-
-       /* Update rates table based on new TX rate determined by AMRR. */
-       iwm_setrates(in);
-       refcnt_rele_wake(&sc->task_refs);
-       splx(s);
-}
-
-void
-iwm_setrates(struct iwm_node *in)
-{
-       struct ieee80211_node *ni = &in->in_ni;
-       struct ieee80211com *ic = ni->ni_ic;
-       struct iwm_softc *sc = IC2IFP(ic)->if_softc;
-       struct iwm_lq_cmd *lq = &in->in_lq;
-       struct ieee80211_rateset *rs = &ni->ni_rates;
-       int i, ridx, ridx_min, ridx_max, j, sgi_ok = 0, mimo, tab = 0;
-       struct iwm_host_cmd cmd = {
-               .id = IWM_LQ_CMD,
-               .len = { sizeof(in->in_lq), },
-       };
-
-       memset(lq, 0, sizeof(*lq));
-       lq->sta_id = IWM_STATION_ID;
-
-       if (ic->ic_flags & IEEE80211_F_USEPROT)
-               lq->flags |= IWM_LQ_FLAG_USE_RTS_MSK;
-
-       if ((ni->ni_flags & IEEE80211_NODE_HT) &&
-           ieee80211_node_supports_ht_sgi20(ni)) {
-               ni->ni_flags |= IEEE80211_NODE_HT_SGI20;
-               sgi_ok = 1;
-       }
-
-       /*
-        * Fill the LQ rate selection table with legacy and/or HT rates
-        * in descending order, i.e. with the node's current TX rate first.
-        * In cases where throughput of an HT rate corresponds to a legacy
-        * rate it makes no sense to add both. We rely on the fact that
-        * iwm_rates is laid out such that equivalent HT/legacy rates share
-        * the same IWM_RATE_*_INDEX value. Also, rates not applicable to
-        * legacy/HT are assumed to be marked with an 'invalid' PLCP value.
-        */
-       j = 0;
-       ridx_min = iwm_rval2ridx(ieee80211_min_basic_rate(ic));
-       mimo = iwm_is_mimo_mcs(ni->ni_txmcs);
-       ridx_max = (mimo ? IWM_RIDX_MAX : IWM_LAST_HT_SISO_RATE);
-       for (ridx = ridx_max; ridx >= ridx_min; ridx--) {
-               uint8_t plcp = iwm_rates[ridx].plcp;
-               uint8_t ht_plcp = iwm_rates[ridx].ht_plcp;
-
-               if (j >= nitems(lq->rs_table))
-                       break;
-               tab = 0;
-               if (ni->ni_flags & IEEE80211_NODE_HT) {
-                       if (ht_plcp == IWM_RATE_HT_SISO_MCS_INV_PLCP)
-                               continue;
-                       /* Do not mix SISO and MIMO HT rates. */
-                       if ((mimo && !iwm_is_mimo_ht_plcp(ht_plcp)) ||
-                           (!mimo && iwm_is_mimo_ht_plcp(ht_plcp)))
-                               continue;
-                       for (i = ni->ni_txmcs; i >= 0; i--) {
-                               if (isclr(ni->ni_rxmcs, i))
-                                       continue;
-                               if (ridx == iwm_mcs2ridx[i]) {
-                                       tab = ht_plcp;
-                                       tab |= IWM_RATE_MCS_HT_MSK;
-                                       if (sgi_ok)
-                                               tab |= IWM_RATE_MCS_SGI_MSK;
-                                       break;
-                               }
-                       }
-               } else if (plcp != IWM_RATE_INVM_PLCP) {
-                       for (i = ni->ni_txrate; i >= 0; i--) {
-                               if (iwm_rates[ridx].rate == (rs->rs_rates[i] &
-                                   IEEE80211_RATE_VAL)) {
-                                       tab = plcp;
-                                       break;
-                               }
-                       }
-               }
-
-               if (tab == 0)
-                       continue;
-
-               if (iwm_is_mimo_ht_plcp(ht_plcp))
-                       tab |= IWM_RATE_MCS_ANT_AB_MSK;
-               else
-                       tab |= IWM_RATE_MCS_ANT_A_MSK;
-
-               if (IWM_RIDX_IS_CCK(ridx))
-                       tab |= IWM_RATE_MCS_CCK_MSK;
-               lq->rs_table[j++] = htole32(tab);
-       }
-
-       lq->mimo_delim = (mimo ? j : 0);
-
-       /* Fill the rest with the lowest possible rate */
-       while (j < nitems(lq->rs_table)) {
-               tab = iwm_rates[ridx_min].plcp;
-               if (IWM_RIDX_IS_CCK(ridx_min))
-                       tab |= IWM_RATE_MCS_CCK_MSK;
-               tab |= IWM_RATE_MCS_ANT_A_MSK;
-               lq->rs_table[j++] = htole32(tab);
-       }
-
-       lq->single_stream_ant_msk = IWM_ANT_A;
-       lq->dual_stream_ant_msk = IWM_ANT_AB;
-
-       lq->agg_time_limit = htole16(4000);     /* 4ms */
-       lq->agg_disable_start_th = 3;
-#ifdef notyet
-       lq->agg_frame_cnt_limit = 0x3f;
-#else
-       lq->agg_frame_cnt_limit = 1; /* tx agg disabled */
-#endif
-
-       cmd.data[0] = &in->in_lq;
-       iwm_send_cmd(sc, &cmd);
-}
-
 int
 iwm_media_change(struct ifnet *ifp)
 {
@@ -6196,7 +6096,6 @@ iwm_newstate(struct ieee80211com *ic, enum ieee80211_s
        if (ic->ic_state == IEEE80211_S_RUN) {
                timeout_del(&sc->sc_calib_to);
                ieee80211_mira_cancel_timeouts(&in->in_mn);
-               iwm_del_task(sc, systq, &sc->setrates_task);
                iwm_del_task(sc, systq, &sc->ba_task);
                iwm_del_task(sc, systq, &sc->htprot_task);
        }
@@ -6705,7 +6604,6 @@ iwm_stop(struct ifnet *ifp)
        /* Cancel scheduled tasks and let any stale tasks finish up. */
        task_del(systq, &sc->init_task);
        iwm_del_task(sc, sc->sc_nswq, &sc->newstate_task);
-       iwm_del_task(sc, systq, &sc->setrates_task);
        iwm_del_task(sc, systq, &sc->ba_task);
        iwm_del_task(sc, systq, &sc->htprot_task);
        KASSERT(sc->task_refs.refs >= 1);
@@ -7895,7 +7793,6 @@ iwm_attach(struct device *parent, struct device *self,
        timeout_set(&sc->sc_led_blink_to, iwm_led_blink_timeout, sc);
        task_set(&sc->init_task, iwm_init_task, sc);
        task_set(&sc->newstate_task, iwm_newstate_task, sc);
-       task_set(&sc->setrates_task, iwm_setrates_task, sc);
        task_set(&sc->ba_task, iwm_ba_task, sc);
        task_set(&sc->htprot_task, iwm_htprot_task, sc);
 
blob - 7e35820c7ceb997683c3c0c0e1da1112290c38cd
file + sys/dev/pci/if_iwmvar.h
--- sys/dev/pci/if_iwmvar.h
+++ sys/dev/pci/if_iwmvar.h
@@ -509,9 +509,12 @@ struct iwm_node {
        uint16_t in_id;
        uint16_t in_color;
 
-       struct iwm_lq_cmd in_lq;
        struct ieee80211_amrr_node in_amn;
        struct ieee80211_mira_node in_mn;
+
+       /* Set in 11n mode if we don't receive ACKs for OFDM frames. */
+       int ht_force_cck;
+
 };
 #define IWM_STATION_ID 0
 #define IWM_AUX_STA_ID 1
blob - 2daf18ca6aed42bfded73d204690736ca6a4ae5e
file + sys/net80211/ieee80211_amrr.c
--- sys/net80211/ieee80211_amrr.c
+++ sys/net80211/ieee80211_amrr.c
@@ -41,45 +41,28 @@
 #define reset_cnt(amn)         \
        do { (amn)->amn_txcnt = (amn)->amn_retrycnt = 0; } while (0)
 
-/* 
- * XXX In HT mode we only support MCS 0-7, for now.
- * Beyond MCS 7, incrementing the MCS index does not imply a
- * higher data rate, so this simple implementation will need
- * to be enhanced.
- */
-
 static inline int
 is_min_rate(struct ieee80211_node *ni)
 {
-       if (ni->ni_flags & IEEE80211_NODE_HT)
-               return (ni->ni_txmcs == 0);
        return (ni->ni_txrate == 0);
 }
 
 static inline int
 is_max_rate(struct ieee80211_node *ni)
 {
-       if (ni->ni_flags & IEEE80211_NODE_HT)
-               return (ni->ni_txmcs == 7); /* XXX up to MCS 7 only */
        return (ni->ni_txrate == ni->ni_rates.rs_nrates - 1);
 }
 
 static inline void
 increase_rate(struct ieee80211_node *ni)
 {
-       if (ni->ni_flags & IEEE80211_NODE_HT)
-               ni->ni_txmcs++;
-       else
-               ni->ni_txrate++;
+       ni->ni_txrate++;
 }
 
 static inline void
 decrease_rate(struct ieee80211_node *ni)
 {
-       if (ni->ni_flags & IEEE80211_NODE_HT)
-               ni->ni_txmcs--;
-       else
-               ni->ni_txrate--;
+       ni->ni_txrate--;
 }
 
 void
@@ -110,7 +93,6 @@ ieee80211_amrr_choose(struct ieee80211_amrr *amrr, str
                        amn->amn_success = 0;
                        increase_rate(ni);
                        DPRINTF(("increase rate=%d,#tx=%d,#retries=%d\n",
-                           (ni->ni_flags & IEEE80211_NODE_HT) ? ni->ni_txmcs :
                            RV(ni->ni_rates.rs_rates[ni->ni_txrate]),
                            amn->amn_txcnt, amn->amn_retrycnt));
                        need_change = 1;
@@ -132,7 +114,6 @@ ieee80211_amrr_choose(struct ieee80211_amrr *amrr, str
                        }
                        decrease_rate(ni);
                        DPRINTF(("decrease rate=%d,#tx=%d,#retries=%d\n",
-                           (ni->ni_flags & IEEE80211_NODE_HT) ? ni->ni_txmcs :
                            RV(ni->ni_rates.rs_rates[ni->ni_txrate]),
                            amn->amn_txcnt, amn->amn_retrycnt));
                        need_change = 1;




Reply via email to