On Mon, Jun 21, 2021 at 08:37:11PM +0200, Stefan Sperling wrote:
> This patch attempts to implement Tx aggregation support for iwx(4).
> 
> It is not yet ready to be committed because of outstanding problems:
> 
> - Under load the firmware throws a fatal firmware error every few minutes.
> 
> - Starting a background scan under load can cause firmware errors and
>   might error out when the driver attempts to flush Tx queues.
>   However, roaming seems to be generally working while traffic is light.
> 
> - Sometimes traffic seems to get stuck for no apparent reason and the driver
>   won't recover without down/up. This is independent from the rx_reorder()
>   fix which was committed today.

Following my commits to iwx from today, here is a rebased txagg patch.
The above issues are still present, unfortunately.

diff refs/heads/master refs/heads/iwx-txagg
blob - bdf8ce3e1afa332f698e3dc56af77e6acb4f8689
blob + 1dc858edd01b9f545a12fe2424b010fc4be08a56
--- sys/dev/pci/if_iwx.c
+++ sys/dev/pci/if_iwx.c
@@ -311,16 +311,14 @@ int       iwx_ampdu_rx_start(struct ieee80211com *, 
struct i
            uint8_t);
 void   iwx_ampdu_rx_stop(struct ieee80211com *, struct ieee80211_node *,
            uint8_t);
+int    iwx_ampdu_tx_start(struct ieee80211com *, struct ieee80211_node *,
+           uint8_t);
 void   iwx_rx_ba_session_expired(void *);
 void   iwx_reorder_timer_expired(void *);
 void   iwx_sta_rx_agg(struct iwx_softc *, struct ieee80211_node *, uint8_t,
            uint16_t, uint16_t, int, int);
-#ifdef notyet
-int    iwx_ampdu_tx_start(struct ieee80211com *, struct ieee80211_node *,
+void   iwx_sta_tx_agg_start(struct iwx_softc *, struct ieee80211_node *,
            uint8_t);
-void   iwx_ampdu_tx_stop(struct ieee80211com *, struct ieee80211_node *,
-           uint8_t);
-#endif
 void   iwx_ba_task(void *);
 
 int    iwx_set_mac_addr_from_csr(struct iwx_softc *, struct iwx_nvm_data *);
@@ -348,8 +346,11 @@ void       iwx_rx_frame(struct iwx_softc *, struct mbuf *, 
i
            uint32_t, struct ieee80211_rxinfo *, struct mbuf_list *);
 void   iwx_rx_tx_cmd_single(struct iwx_softc *, struct iwx_rx_packet *,
            struct iwx_node *);
+void   iwx_txd_done(struct iwx_softc *, struct iwx_tx_data *);
+void   iwx_txq_advance(struct iwx_softc *, struct iwx_tx_ring *, int);
 void   iwx_rx_tx_cmd(struct iwx_softc *, struct iwx_rx_packet *,
            struct iwx_rx_data *);
+void   iwx_clear_oactive(struct iwx_softc *, struct iwx_tx_ring *);
 void   iwx_rx_bmiss(struct iwx_softc *, struct iwx_rx_packet *,
            struct iwx_rx_data *);
 int    iwx_binding_cmd(struct iwx_softc *, struct iwx_node *, uint32_t);
@@ -369,8 +370,11 @@ void       iwx_cmd_done(struct iwx_softc *, int, int, int);
 const struct iwx_rate *iwx_tx_fill_cmd(struct iwx_softc *, struct iwx_node *,
            struct ieee80211_frame *, struct iwx_tx_cmd_gen2 *);
 void   iwx_tx_update_byte_tbl(struct iwx_tx_ring *, int, uint16_t, uint16_t);
-int    iwx_tx(struct iwx_softc *, struct mbuf *, struct ieee80211_node *, int);
-int    iwx_flush_tx_path(struct iwx_softc *);
+int    iwx_tx(struct iwx_softc *, struct mbuf *, struct ieee80211_node *);
+int    iwx_flush_sta_tids(struct iwx_softc *, int, uint16_t);
+int    iwx_wait_tx_queues_empty(struct iwx_softc *);
+int    iwx_drain_sta(struct iwx_softc *sc, struct iwx_node *, int);
+int    iwx_flush_sta(struct iwx_softc *, struct iwx_node *);
 int    iwx_beacon_filter_send_cmd(struct iwx_softc *,
            struct iwx_beacon_filter_cmd *);
 int    iwx_update_beacon_abort(struct iwx_softc *, struct iwx_node *, int);
@@ -406,7 +410,7 @@ int iwx_scan_abort(struct iwx_softc *);
 int    iwx_rs_rval2idx(uint8_t);
 uint16_t iwx_rs_ht_rates(struct iwx_softc *, struct ieee80211_node *, int);
 int    iwx_rs_init(struct iwx_softc *, struct iwx_node *);
-int    iwx_enable_data_tx_queues(struct iwx_softc *);
+int    iwx_enable_mgmt_queue(struct iwx_softc *);
 int    iwx_auth(struct iwx_softc *);
 int    iwx_deauth(struct iwx_softc *);
 int    iwx_assoc(struct iwx_softc *);
@@ -422,6 +426,7 @@ void        iwx_delete_key(struct ieee80211com *,
 int    iwx_media_change(struct ifnet *);
 void   iwx_newstate_task(void *);
 int    iwx_newstate(struct ieee80211com *, enum ieee80211_state, int);
+void   iwx_endbgscan_task(void *);
 void   iwx_endscan(struct iwx_softc *);
 void   iwx_fill_sf_command(struct iwx_softc *, struct iwx_sf_cfg_cmd *,
            struct ieee80211_node *);
@@ -1692,20 +1697,21 @@ iwx_alloc_tx_ring(struct iwx_softc *sc, struct iwx_tx_
        ring->desc = ring->desc_dma.vaddr;
 
        /*
-        * There is no need to allocate DMA buffers for unused rings.
-        * The hardware supports up to 31 Tx rings which is more
+        * The hardware supports up to 512 Tx rings which is more
         * than we currently need.
         *
-        * In DQA mode we use 1 command queue + 4 DQA mgmt/data queues.
-        * The command is queue 0 (sc->txq[0]), and 4 mgmt/data frame queues
-        * are sc->tqx[ac + IWX_DQA_AUX_QUEUE + 1], i.e. sc->txq[2:5],
-        * in order to provide one queue per EDCA category.
+        * In DQA mode we use 1 command queue + 1 default queue for
+        * managment, control, and non-QoS data frames.
+        * The command is queue sc->txq[0], our default queue is
+        * sc->txq[2], and sc->txq[1] is an AUX queue used by firwmare.
         *
-        * Tx aggregation will require additional queues (one queue per TID
-        * for which aggregation is enabled) but we do not implement this yet.
+        * Tx aggregation requires additional queues, one queue per TID for
+        * which aggregation is enabled. We map TID 0-7 to sc->txq[3:10].
+        * Firmware may assign its own internal IDs for these queues
+        * depending on which TID gets aggregation enabled first.
+        * The driver maintains a table mapping driver-side queue IDs
+        * to firmware-side queue IDs.
         */
-       if (qid > IWX_DQA_MIN_MGMT_QUEUE)
-               return 0;
 
        err = iwx_dma_contig_alloc(sc->sc_dmat, &ring->bc_tbl,
            sizeof(struct iwx_agn_scd_bc_tbl), 0);
@@ -1779,9 +1785,17 @@ iwx_reset_tx_ring(struct iwx_softc *sc, struct iwx_tx_
        bus_dmamap_sync(sc->sc_dmat, ring->desc_dma.map, 0,
            ring->desc_dma.size, BUS_DMASYNC_PREWRITE);
        sc->qfullmsk &= ~(1 << ring->qid);
+       sc->qenablemsk &= ~(1 << ring->qid);
+       for (i = 0; i < nitems(sc->aggqid); i++) {
+               if (sc->aggqid[i] == ring->qid) {
+                       sc->aggqid[i] = 0;
+                       break;
+               }
+       }
        ring->queued = 0;
        ring->cur = 0;
        ring->tail = 0;
+       ring->tid = 0;
 }
 
 void
@@ -2271,15 +2285,23 @@ iwx_start_hw(struct iwx_softc *sc)
 void
 iwx_stop_device(struct iwx_softc *sc)
 {
-       int qid;
+       struct ieee80211com *ic = &sc->sc_ic;
+       struct ieee80211_node *ni = ic->ic_bss;
+       int i;
 
        iwx_disable_interrupts(sc);
        sc->sc_flags &= ~IWX_FLAG_USE_ICT;
 
        iwx_disable_rx_dma(sc);
        iwx_reset_rx_ring(sc, &sc->rxq);
-       for (qid = 0; qid < nitems(sc->txq); qid++)
-               iwx_reset_tx_ring(sc, &sc->txq[qid]);
+       for (i = 0; i < nitems(sc->txq); i++)
+               iwx_reset_tx_ring(sc, &sc->txq[i]);
+       for (i = 0; i < IEEE80211_NUM_TID; i++) {
+               struct ieee80211_tx_ba *ba = &ni->ni_tx_ba[i];
+               if (ba->ba_state != IEEE80211_BA_AGREED)
+                       continue;
+               ieee80211_delba_request(ic, ni, 0, 1, i);
+       }
 
        /* Make sure (redundant) we've released our request to stay awake */
        IWX_CLRBITS(sc, IWX_CSR_GP_CNTRL,
@@ -2386,6 +2408,18 @@ iwx_nic_init(struct iwx_softc *sc)
        return 0;
 }
 
+/* Map a TID to an ieee80211_edca_ac category. */
+const uint8_t iwx_tid_to_ac[IWX_MAX_TID_COUNT] = {
+       EDCA_AC_BE,
+       EDCA_AC_BK,
+       EDCA_AC_BK,
+       EDCA_AC_BE,
+       EDCA_AC_VI,
+       EDCA_AC_VI,
+       EDCA_AC_VO,
+       EDCA_AC_VO,
+};
+
 /* Map ieee80211_edca_ac categories to firmware Tx FIFO. */
 const uint8_t iwx_ac_to_tx_fifo[] = {
        IWX_GEN2_EDCA_TX_FIFO_BE,
@@ -2458,6 +2492,9 @@ iwx_enable_txq(struct iwx_softc *sc, int sta_id, int q
                err = EIO;
                goto out;
        }
+
+       sc->qenablemsk |= (1 << qid);
+       ring->tid = tid;
 out:
        iwx_free_resp(sc, &hcmd);
        return err;
@@ -2993,6 +3030,55 @@ iwx_updateedca(struct ieee80211com *ic)
 }
 
 void
+iwx_sta_tx_agg_start(struct iwx_softc *sc, struct ieee80211_node *ni,
+    uint8_t tid)
+{
+       struct ieee80211com *ic = &sc->sc_ic;
+       struct ieee80211_tx_ba *ba;
+       int err, qid;
+
+       /* Ensure we can map this TID to an aggregation queue. */
+       if (tid >= IWX_MAX_TID_COUNT)
+               return;
+
+       ba = &ni->ni_tx_ba[tid];
+       if (ba->ba_state != IEEE80211_BA_REQUESTED)
+               return;
+
+       qid = sc->aggqid[tid];
+       if (qid == 0) {
+               /* Firmware should pick the next unused Tx queue. */
+               qid = fls(sc->qenablemsk);
+       }
+
+       /*
+        * Simply enable the queue.
+        * Firmware handles Tx Ba session setup and teardown.
+        */
+       if ((sc->qenablemsk & (1 << qid)) == 0) {
+               if (!iwx_nic_lock(sc)) {
+                       ieee80211_addba_resp_refuse(ic, ni, tid,
+                           IEEE80211_STATUS_UNSPECIFIED);
+                       return;
+               }
+               err = iwx_enable_txq(sc, IWX_STATION_ID, qid, tid,
+                   IWX_TX_RING_COUNT);
+               iwx_nic_unlock(sc);
+               if (err) {
+                       printf("%s: could not enable Tx queue %d "
+                           "(error %d)\n", DEVNAME(sc), qid, err);
+                       ieee80211_addba_resp_refuse(ic, ni, tid,
+                           IEEE80211_STATUS_UNSPECIFIED);
+                       return;
+               }
+       }
+
+       ba->ba_timeout_val = 0;
+       ieee80211_addba_resp_accept(ic, ni, tid);
+       sc->aggqid[tid] = qid;
+}
+
+void
 iwx_ba_task(void *arg)
 {
        struct iwx_softc *sc = arg;
@@ -3004,16 +3090,26 @@ iwx_ba_task(void *arg)
        for (tid = 0; tid < IWX_MAX_TID_COUNT; tid++) {
                if (sc->sc_flags & IWX_FLAG_SHUTDOWN)
                        break;
-               if (sc->ba_start_tidmask & (1 << tid)) {
-                       iwx_sta_rx_agg(sc, ni, tid, sc->ba_ssn[tid],
-                           sc->ba_winsize[tid], sc->ba_timeout_val[tid], 1);
-                       sc->ba_start_tidmask &= ~(1 << tid);
-               } else if (sc->ba_stop_tidmask & (1 << tid)) {
+               if (sc->ba_rx.start_tidmask & (1 << tid)) {
+                       struct ieee80211_rx_ba *ba = &ni->ni_rx_ba[tid];
+                       iwx_sta_rx_agg(sc, ni, tid, ba->ba_winstart,
+                           ba->ba_winsize, ba->ba_timeout_val, 1);
+                       sc->ba_rx.start_tidmask &= ~(1 << tid);
+               } else if (sc->ba_rx.stop_tidmask & (1 << tid)) {
                        iwx_sta_rx_agg(sc, ni, tid, 0, 0, 0, 0);
-                       sc->ba_stop_tidmask &= ~(1 << tid);
+                       sc->ba_rx.stop_tidmask &= ~(1 << tid);
                }
        }
 
+       for (tid = 0; tid < IWX_MAX_TID_COUNT; tid++) {
+               if (sc->sc_flags & IWX_FLAG_SHUTDOWN)
+                       break;
+               if (sc->ba_tx.start_tidmask & (1 << tid)) {
+                       iwx_sta_tx_agg_start(sc, ni, tid);
+                       sc->ba_tx.start_tidmask &= ~(1 << tid);
+               }
+       }
+
        refcnt_rele_wake(&sc->task_refs);
        splx(s);
 }
@@ -3026,17 +3122,16 @@ int
 iwx_ampdu_rx_start(struct ieee80211com *ic, struct ieee80211_node *ni,
     uint8_t tid)
 {
-       struct ieee80211_rx_ba *ba = &ni->ni_rx_ba[tid];
        struct iwx_softc *sc = IC2IFP(ic)->if_softc;
 
        if (sc->sc_rx_ba_sessions >= IWX_MAX_RX_BA_SESSIONS ||
-           tid > IWX_MAX_TID_COUNT || (sc->ba_start_tidmask & (1 << tid)))
+           tid > IWX_MAX_TID_COUNT)
                return ENOSPC;
 
-       sc->ba_start_tidmask |= (1 << tid);
-       sc->ba_ssn[tid] = ba->ba_winstart;
-       sc->ba_winsize[tid] = ba->ba_winsize;
-       sc->ba_timeout_val[tid] = ba->ba_timeout_val;
+       if (sc->ba_rx.start_tidmask & (1 << tid))
+               return EBUSY;
+
+       sc->ba_rx.start_tidmask |= (1 << tid);
        iwx_add_task(sc, systq, &sc->ba_task);
 
        return EBUSY;
@@ -3052,13 +3147,42 @@ iwx_ampdu_rx_stop(struct ieee80211com *ic, struct ieee
 {
        struct iwx_softc *sc = IC2IFP(ic)->if_softc;
 
-       if (tid > IWX_MAX_TID_COUNT || sc->ba_stop_tidmask & (1 << tid))
+       if (tid > IWX_MAX_TID_COUNT || sc->ba_rx.stop_tidmask & (1 << tid))
                return;
 
-       sc->ba_stop_tidmask = (1 << tid);
+       sc->ba_rx.stop_tidmask = (1 << tid);
        iwx_add_task(sc, systq, &sc->ba_task);
 }
 
+int
+iwx_ampdu_tx_start(struct ieee80211com *ic, struct ieee80211_node *ni,
+    uint8_t tid)
+{
+       struct iwx_softc *sc = IC2IFP(ic)->if_softc;
+       struct ieee80211_tx_ba *ba = &ni->ni_tx_ba[tid];
+
+       /* Ensure we can map this TID to an aggregation queue. */
+       if (tid >= IWX_MAX_TID_COUNT)
+               return EINVAL;
+
+       /* We only support a fixed Tx aggregation window size, for now. */
+       if (ba->ba_winsize != IWX_FRAME_LIMIT)
+               return ENOTSUP;
+
+       /* Is firmware already using an agg queue with this TID? */
+       if (sc->aggqid[tid] != 0)
+               return ENOSPC;
+
+       /* Are we already processing an ADDBA request? */
+       if (sc->ba_tx.start_tidmask & (1 << tid))
+               return EBUSY;
+
+       sc->ba_tx.start_tidmask |= (1 << tid);
+       iwx_add_task(sc, systq, &sc->ba_task);
+
+       return EBUSY;
+}
+
 /* Read the mac address from WFMP registers. */
 int
 iwx_set_mac_addr_from_csr(struct iwx_softc *sc, struct iwx_nvm_data *data)
@@ -4239,16 +4363,28 @@ iwx_txd_done(struct iwx_softc *sc, struct iwx_tx_data 
 }
 
 void
+iwx_txq_advance(struct iwx_softc *sc, struct iwx_tx_ring *ring, int idx)
+{
+       struct iwx_tx_data *txd;
+
+       while (ring->tail != idx) {
+               txd = &ring->data[ring->tail];
+               if (txd->m != NULL) {
+                       iwx_txd_done(sc, txd);
+                       iwx_tx_update_byte_tbl(ring, idx, 0, 0);
+                       ring->queued--;
+               }
+               ring->tail = (ring->tail + 1) % IWX_TX_RING_COUNT;
+       }
+}
+
+void
 iwx_rx_tx_cmd(struct iwx_softc *sc, struct iwx_rx_packet *pkt,
     struct iwx_rx_data *data)
 {
-       struct ieee80211com *ic = &sc->sc_ic;
-       struct ifnet *ifp = IC2IFP(ic);
        struct iwx_cmd_header *cmd_hdr = &pkt->hdr;
-       int idx = cmd_hdr->idx;
        int qid = cmd_hdr->qid;
        struct iwx_tx_ring *ring = &sc->txq[qid];
-       struct iwx_tx_data *txd;
        struct iwx_tx_resp *tx_resp = (void *)pkt->data;
        uint32_t ssn;
        uint32_t len = iwx_rx_packet_len(pkt);
@@ -4258,33 +4394,37 @@ iwx_rx_tx_cmd(struct iwx_softc *sc, struct iwx_rx_pack
 
        sc->sc_tx_timer = 0;
 
-       txd = &ring->data[idx];
-       if (txd->m == NULL)
+       /* Sanity checks. */
+       if (sizeof(*tx_resp) > len)
                return;
-
-       if (sizeof(*tx_resp) + sizeof(ssn) +
+       if (qid < IWX_FIRST_AGG_TX_QUEUE && tx_resp->frame_count > 1)
+               return;
+       if (qid >= IWX_FIRST_AGG_TX_QUEUE && sizeof(*tx_resp) + sizeof(ssn) +
            tx_resp->frame_count * sizeof(tx_resp->status) > len)
                return;
 
-       iwx_rx_tx_cmd_single(sc, pkt, txd->in);
+       if (tx_resp->frame_count > 1) /* A-MPDU */
+               return;
 
        /*
-        * Even though this is not an agg queue, we must only free
-        * frames before the firmware's starting sequence number.
+        * On hardware supported by iwx(4) the SSN counter is only
+        * 8 bit and corresponds to a Tx ring index rather than a
+        * sequence number. Frames up to this index (non-inclusive)
+        * can now be freed.
         */
        memcpy(&ssn, &tx_resp->status + tx_resp->frame_count, sizeof(ssn));
-       ssn = le32toh(ssn) & 0xfff;
-       while (ring->tail != IWX_AGG_SSN_TO_TXQ_IDX(ssn)) {
-               txd = &ring->data[ring->tail];
-               if (txd->m != NULL) {
-                       iwx_txd_done(sc, txd);
-                       iwx_tx_update_byte_tbl(ring, idx, 0, 0);
-                       ring->queued--;
-               }
-               ring->tail = (ring->tail + 1) % IWX_TX_RING_COUNT;
-       }
+       ssn = le32toh(ssn) & 0xff;
+       iwx_txq_advance(sc, ring, ssn);
+       iwx_clear_oactive(sc, ring);
+}
 
-       if (--ring->queued < IWX_TX_RING_LOMARK) {
+void
+iwx_clear_oactive(struct iwx_softc *sc, struct iwx_tx_ring *ring)
+{
+       struct ieee80211com *ic = &sc->sc_ic;
+       struct ifnet *ifp = IC2IFP(ic);
+
+       if (ring->queued < IWX_TX_RING_LOMARK) {
                sc->qfullmsk &= ~(1 << ring->qid);
                if (sc->qfullmsk == 0 && ifq_is_oactive(&ifp->if_snd)) {
                        ifq_clr_oactive(&ifp->if_snd);
@@ -4299,6 +4439,64 @@ iwx_rx_tx_cmd(struct iwx_softc *sc, struct iwx_rx_pack
 }
 
 void
+iwx_rx_compressed_ba(struct iwx_softc *sc, struct iwx_rx_packet *pkt,
+    struct iwx_rx_data *data)
+{
+       struct iwx_compressed_ba_notif *ba_res = (void *)pkt->data;
+       struct ieee80211com *ic = &sc->sc_ic;
+       struct ieee80211_node *ni;
+       struct ieee80211_tx_ba *ba;
+       struct iwx_node *in;
+       struct iwx_tx_ring *ring;
+       uint16_t i, tfd_cnt, ra_tid_cnt, idx;
+       int qid;
+
+       if (ic->ic_state != IEEE80211_S_RUN)
+               return;
+
+       if (iwx_rx_packet_payload_len(pkt) < sizeof(*ba_res))
+               return;
+
+       if (ba_res->sta_id != IWX_STATION_ID)
+               return;
+
+       ni = ic->ic_bss;
+       in = (void *)ni;
+
+       tfd_cnt = le16toh(ba_res->tfd_cnt);
+       ra_tid_cnt = le16toh(ba_res->ra_tid_cnt);
+       if (!tfd_cnt || iwx_rx_packet_payload_len(pkt) < (sizeof(*ba_res) +
+           sizeof(ba_res->ra_tid[0]) * ra_tid_cnt +
+           sizeof(ba_res->tfd[0]) * tfd_cnt))
+               return;
+
+       for (i = 0; i < tfd_cnt; i++) {
+               struct iwx_compressed_ba_tfd *ba_tfd = &ba_res->tfd[i];
+               uint8_t tid;
+
+               tid = ba_tfd->tid;
+               if (tid >= nitems(sc->aggqid))
+                       continue;
+
+               qid = sc->aggqid[tid];
+               if (qid != htole16(ba_tfd->q_num))
+                       continue;
+
+               ring = &sc->txq[qid];
+
+               ba = &ni->ni_tx_ba[tid];
+               if (ba->ba_state != IEEE80211_BA_AGREED)
+                       continue;
+
+               idx = le16toh(ba_tfd->tfd_index);
+               if (idx >= IWX_TX_RING_COUNT)
+                       continue;
+               iwx_txq_advance(sc, ring, idx);
+               iwx_clear_oactive(sc, ring);
+       }
+}
+
+void
 iwx_rx_bmiss(struct iwx_softc *sc, struct iwx_rx_packet *pkt,
     struct iwx_rx_data *data)
 {
@@ -4799,7 +4997,7 @@ iwx_tx_update_byte_tbl(struct iwx_tx_ring *txq, int id
 }
 
 int
-iwx_tx(struct iwx_softc *sc, struct mbuf *m, struct ieee80211_node *ni, int ac)
+iwx_tx(struct iwx_softc *sc, struct mbuf *m, struct ieee80211_node *ni)
 {
        struct ieee80211com *ic = &sc->sc_ic;
        struct iwx_node *in = (void *)ni;
@@ -4815,25 +5013,36 @@ iwx_tx(struct iwx_softc *sc, struct mbuf *m, struct ie
        u_int hdrlen;
        bus_dma_segment_t *seg;
        uint16_t num_tbs;
-       uint8_t type;
-       int i, totlen, err, pad;
+       uint8_t type, subtype;
+       int i, totlen, err, pad, qid;
 
        wh = mtod(m, struct ieee80211_frame *);
-       hdrlen = ieee80211_get_hdrlen(wh);
        type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
+       subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
+       if (type == IEEE80211_FC0_TYPE_CTL)
+               hdrlen = sizeof(struct ieee80211_frame_min);
+       else
+               hdrlen = ieee80211_get_hdrlen(wh);
 
-       /*
-        * Map EDCA categories to Tx data queues.
-        *
-        * We use static data queue assignments even in DQA mode. We do not
-        * need to share Tx queues between stations because we only implement
-        * client mode; the firmware's station table contains only one entry
-        * which represents our access point.
-        *
-        * Tx aggregation will require additional queues (one queue per TID
-        * for which aggregation is enabled) but we do not implement this yet.
-        */
-       ring = &sc->txq[ac + IWX_DQA_AUX_QUEUE + 1];
+       qid = IWX_DQA_MGMT_QUEUE;
+
+       /* Put QoS frames on the data queue which maps to their TID. */
+       if (ieee80211_has_qos(wh)) {
+               struct ieee80211_tx_ba *ba;
+               uint16_t qos = ieee80211_get_qos(wh);
+               uint8_t tid = qos & IEEE80211_QOS_TID;
+
+               ba = &ni->ni_tx_ba[tid];
+               if (!IEEE80211_IS_MULTICAST(wh->i_addr1) &&
+                   type == IEEE80211_FC0_TYPE_DATA &&
+                   subtype != IEEE80211_FC0_SUBTYPE_NODATA &&
+                   sc->aggqid[tid] != 0 &&
+                   ba->ba_state == IEEE80211_BA_AGREED) {
+                       qid = sc->aggqid[tid];
+               }
+       }
+
+       ring = &sc->txq[qid];
        desc = &ring->desc[ring->cur];
        memset(desc, 0, sizeof(*desc));
        data = &ring->data[ring->cur];
@@ -4984,18 +5193,167 @@ iwx_tx(struct iwx_softc *sc, struct mbuf *m, struct ie
 }
 
 int
-iwx_flush_tx_path(struct iwx_softc *sc)
+iwx_flush_sta_tids(struct iwx_softc *sc, int sta_id, uint16_t tids)
 {
+       struct iwx_rx_packet *pkt;
+       struct iwx_tx_path_flush_cmd_rsp *resp;
        struct iwx_tx_path_flush_cmd flush_cmd = {
-               .sta_id = htole32(IWX_STATION_ID),
-               .tid_mask = htole16(0xffff),
+               .sta_id = htole32(sta_id),
+               .tid_mask = htole16(tids),
        };
+       struct iwx_host_cmd hcmd = {
+               .id = IWX_TXPATH_FLUSH,
+               .len = { sizeof(flush_cmd), },
+               .data = { &flush_cmd, },
+               .flags = IWX_CMD_WANT_RESP,
+               .resp_pkt_len = sizeof(*pkt) + sizeof(*resp),
+       };
+       int err, resp_len, i, num_flushed_queues;
+
+       err = iwx_send_cmd(sc, &hcmd);
+       if (err)
+               return err;
+
+       pkt = hcmd.resp_pkt;
+       if (!pkt || (pkt->hdr.flags & IWX_CMD_FAILED_MSK)) {
+               err = EIO;
+               goto out;
+       }
+
+       resp_len = iwx_rx_packet_payload_len(pkt);
+       /* Some firmware versions don't provide a response. */
+       if (resp_len == 0)
+               goto out;
+       else if (resp_len != sizeof(*resp)) {
+               err = EIO;
+               goto out;
+       }
+
+       resp = (void *)pkt->data;
+
+       if (le16toh(resp->sta_id) != sta_id) {
+               err = EIO;
+               goto out;
+       }
+
+       num_flushed_queues = le16toh(resp->num_flushed_queues);
+       if (num_flushed_queues > IWX_TX_FLUSH_QUEUE_RSP) {
+               err = EIO;
+               goto out;
+       }
+
+       for (i = 0; i < num_flushed_queues; i++) {
+               struct iwx_flush_queue_info *queue_info = &resp->queues[i];
+               uint16_t tid = le16toh(queue_info->tid);
+               uint16_t read_after = le16toh(queue_info->read_after_flush);
+               uint16_t qid = le16toh(queue_info->queue_num);
+               struct iwx_tx_ring *txq;
+
+               if (qid >= nitems(sc->txq))
+                       continue;
+
+               txq = &sc->txq[qid];
+               if (tid != txq->tid)
+                       continue;
+
+               iwx_txq_advance(sc, txq, read_after);
+       }
+out:
+       iwx_free_resp(sc, &hcmd);
+       return err;
+}
+
+#define IWX_FLUSH_WAIT_MS      2000
+
+int
+iwx_wait_tx_queues_empty(struct iwx_softc *sc)
+{
+       int i, err;
+
+       for (i = 0; i < nitems(sc->txq); i++) {
+               struct iwx_tx_ring *ring = &sc->txq[i];
+
+               if (i == IWX_DQA_CMD_QUEUE)
+                       continue;
+
+               while (ring->queued > 0) {
+                       err = tsleep_nsec(ring, 0, "iwxflush",
+                           MSEC_TO_NSEC(IWX_FLUSH_WAIT_MS));
+                       if (err)
+                               return err;
+               }
+       }
+
+       return 0;
+}
+
+int
+iwx_drain_sta(struct iwx_softc *sc, struct iwx_node* in, int drain)
+{
+       struct iwx_add_sta_cmd cmd;
        int err;
+       uint32_t status;
 
-       err = iwx_send_cmd_pdu(sc, IWX_TXPATH_FLUSH, 0,
-           sizeof(flush_cmd), &flush_cmd);
+       memset(&cmd, 0, sizeof(cmd));
+       cmd.mac_id_n_color = htole32(IWX_FW_CMD_ID_AND_COLOR(in->in_id,
+           in->in_color));
+       cmd.sta_id = IWX_STATION_ID;
+       cmd.add_modify = IWX_STA_MODE_MODIFY;
+       cmd.station_flags = drain ? htole32(IWX_STA_FLG_DRAIN_FLOW) : 0;
+       cmd.station_flags_msk = htole32(IWX_STA_FLG_DRAIN_FLOW);
+
+       status = IWX_ADD_STA_SUCCESS;
+       err = iwx_send_cmd_pdu_status(sc, IWX_ADD_STA,
+           sizeof(cmd), &cmd, &status);
+       if (err) {
+               printf("%s: could not update sta (error %d)\n",
+                   DEVNAME(sc), err);
+               return err;
+       }
+
+       switch (status & IWX_ADD_STA_STATUS_MASK) {
+       case IWX_ADD_STA_SUCCESS:
+               break;
+       default:
+               err = EIO;
+               printf("%s: Couldn't %s draining for station\n",
+                   DEVNAME(sc), drain ? "enable" : "disable");
+               break;
+       }
+
+       return err;
+}
+
+int
+iwx_flush_sta(struct iwx_softc *sc, struct iwx_node *in)
+{
+       int err;
+
+       splassert(IPL_NET);
+
+       sc->sc_flags |= IWX_FLAG_TXFLUSH;
+
+       err = iwx_drain_sta(sc, in, 1);
        if (err)
-                printf("%s: Flushing tx queue failed: %d\n", DEVNAME(sc), err);
+               goto done;
+
+       err = iwx_flush_sta_tids(sc, IWX_STATION_ID, 0xffff);
+       if (err) {
+               printf("%s: could not flush Tx path (error %d)\n",
+                   DEVNAME(sc), err);
+               goto done;
+       }
+
+       err = iwx_wait_tx_queues_empty(sc);
+       if (err) {
+               printf("%s: Could not empty Tx queues (error %d)\n",
+                   DEVNAME(sc), err);
+               goto done;
+       }
+
+       err = iwx_drain_sta(sc, in, 0);
+done:
+       sc->sc_flags &= ~IWX_FLAG_TXFLUSH;
        return err;
 }
 
@@ -5162,15 +5520,23 @@ iwx_add_sta_cmd(struct iwx_softc *sc, struct iwx_node 
        add_sta_cmd.add_modify = update ? 1 : 0;
        add_sta_cmd.station_flags_msk
            |= htole32(IWX_STA_FLG_FAT_EN_MSK | IWX_STA_FLG_MIMO_EN_MSK);
-       add_sta_cmd.tid_disable_tx = htole16(0xffff);
-       if (update)
-               add_sta_cmd.modify_mask |= (IWX_STA_MODIFY_TID_DISABLE_TX);
 
        if (in->in_ni.ni_flags & IEEE80211_NODE_HT) {
                add_sta_cmd.station_flags_msk
                    |= htole32(IWX_STA_FLG_MAX_AGG_SIZE_MSK |
                    IWX_STA_FLG_AGG_MPDU_DENS_MSK);
 
+               if (iwx_mimo_enabled(sc)) {
+                       if (in->in_ni.ni_rxmcs[1] != 0) {
+                               add_sta_cmd.station_flags |=
+                                   htole32(IWX_STA_FLG_MIMO_EN_MIMO2);
+                       }
+                       if (in->in_ni.ni_rxmcs[2] != 0) {
+                               add_sta_cmd.station_flags |=
+                                   htole32(IWX_STA_FLG_MIMO_EN_MIMO3);
+                       }
+               }
+
                add_sta_cmd.station_flags
                    |= htole32(IWX_STA_FLG_MAX_AGG_SIZE_64K);
                switch (ic->ic_ampdu_params & IEEE80211_AMPDU_PARAM_SS) {
@@ -5216,7 +5582,6 @@ iwx_add_aux_sta(struct iwx_softc *sc)
        cmd.station_type = IWX_STA_AUX_ACTIVITY;
        cmd.mac_id_n_color =
            htole32(IWX_FW_CMD_ID_AND_COLOR(IWX_MAC_INDEX_AUX, 0));
-       cmd.tid_disable_tx = htole16(0xffff);
 
        status = IWX_ADD_STA_SUCCESS;
        err = iwx_send_cmd_pdu_status(sc, IWX_ADD_STA, sizeof(cmd), &cmd,
@@ -5282,7 +5647,8 @@ iwx_umac_scan_fill_channels(struct iwx_softc *sc,
                        chan->v1.iter_count = 1;
                        chan->v1.iter_interval = htole16(0);
                }
-               if (n_ssids != 0 && !bgscan)
+               if (n_ssids != 0 && !bgscan &&
+                   (c->ic_flags |= IEEE80211_CHAN_PASSIVE) == 0)
                        chan->flags = htole32(1 << 0); /* select SSID 0 */
                chan++;
                nchan++;
@@ -5649,7 +6015,6 @@ iwx_umac_scan(struct iwx_softc *sc, int bgscan)
                        IWX_UMAC_SCAN_GEN_FLAGS2_ALLOW_CHNL_REORDER;
        }
 
-#if 0 /* XXX Active scan causes firmware errors after association. */
        /* Check if we're doing an active directed scan. */
        if (ic->ic_des_esslen != 0) {
                if (isset(sc->sc_ucode_api, 
IWX_UCODE_TLV_API_SCAN_EXT_CHAN_VER)) {
@@ -5666,7 +6031,6 @@ iwx_umac_scan(struct iwx_softc *sc, int bgscan)
                req->general_flags |=
                    htole32(IWX_UMAC_SCAN_GEN_FLAGS_PRE_CONNECT);
        } else
-#endif
                req->general_flags |= htole32(IWX_UMAC_SCAN_GEN_FLAGS_PASSIVE);
 
        if (isset(sc->sc_enabled_capa, 
@@ -6182,23 +6546,20 @@ iwx_scan_abort(struct iwx_softc *sc)
 }
 
 int
-iwx_enable_data_tx_queues(struct iwx_softc *sc)
+iwx_enable_mgmt_queue(struct iwx_softc *sc)
 {
-       int err, ac;
+       int err;
 
-       for (ac = 0; ac < EDCA_NUM_AC; ac++) {
-               int qid = ac + IWX_DQA_AUX_QUEUE + 1;
-               /*
-                * Regular data frames use the "MGMT" TID and queue.
-                * Other TIDs and queues are reserved for frame aggregation.
-                */
-               err = iwx_enable_txq(sc, IWX_STATION_ID, qid, IWX_TID_NON_QOS,
-                   IWX_TX_RING_COUNT);
-               if (err) {
-                       printf("%s: could not enable Tx queue %d (error %d)\n",
-                           DEVNAME(sc), ac, err);
-                       return err;
-               }
+       /*
+        * Non-QoS frames use the "MGMT" TID and queue.
+        * Other TIDs and data queues are reserved for QoS data frames.
+        */
+       err = iwx_enable_txq(sc, IWX_STATION_ID, IWX_DQA_MGMT_QUEUE,
+           IWX_TID_MGMT, IWX_TX_RING_COUNT);
+       if (err) {
+               printf("%s: could not enable Tx queue %d (error %d)\n",
+                   DEVNAME(sc), IWX_DQA_MGMT_QUEUE, err);
+               return err;
        }
 
        return 0;
@@ -6269,7 +6630,7 @@ iwx_rs_init(struct iwx_softc *sc, struct iwx_node *in)
        cfg_cmd.sta_id = IWX_STATION_ID;
        cfg_cmd.max_ch_width = IWX_RATE_MCS_CHAN_WIDTH_20;
        cfg_cmd.chains = IWX_TLC_MNG_CHAIN_A_MSK | IWX_TLC_MNG_CHAIN_B_MSK;
-       cfg_cmd.max_mpdu_len = IEEE80211_MAX_LEN;
+       cfg_cmd.max_mpdu_len = 3839;
        if (ieee80211_node_supports_ht_sgi20(ni))
                cfg_cmd.sgi_ch_width_supp = (1 << IWX_TLC_MNG_CH_WIDTH_20MHZ);
 
@@ -6374,7 +6735,7 @@ iwx_auth(struct iwx_softc *sc)
                return 0;
        }
 
-       err = iwx_enable_data_tx_queues(sc);
+       err = iwx_enable_mgmt_queue(sc);
        if (err)
                goto rm_sta;
 
@@ -6416,15 +6777,16 @@ int
 iwx_deauth(struct iwx_softc *sc)
 {
        struct ieee80211com *ic = &sc->sc_ic;
-       struct iwx_node *in = (void *)ic->ic_bss;
-       int err;
+       struct ieee80211_node *ni = ic->ic_bss;
+       struct iwx_node *in = (void *)ni;
+       int err, i;
 
        splassert(IPL_NET);
 
        iwx_unprotect_session(sc, in);
 
        if (sc->sc_flags & IWX_FLAG_STA_ACTIVE) {
-               err = iwx_flush_tx_path(sc);
+               err = iwx_flush_sta(sc, in);
                if (err) {
                        printf("%s: could not flush Tx path (error %d)\n",
                            DEVNAME(sc), err);
@@ -6439,6 +6801,14 @@ iwx_deauth(struct iwx_softc *sc)
                sc->sc_flags &= ~IWX_FLAG_STA_ACTIVE;
                sc->sc_rx_ba_sessions = 0;
                in->in_flags = 0;
+               for (i = IWX_FIRST_AGG_TX_QUEUE; i < IWX_LAST_AGG_TX_QUEUE; i++)
+                       sc->qenablemsk &= ~(1 << i);
+               for (i = 0; i < IEEE80211_NUM_TID; i++) {
+                       struct ieee80211_tx_ba *ba = &ni->ni_tx_ba[i];
+                       if (ba->ba_state != IEEE80211_BA_AGREED)
+                               continue;
+                       ieee80211_delba_request(ic, ni, 0, 1, i);
+               }
        }
 
        if (sc->sc_flags & IWX_FLAG_BINDING_ACTIVE) {
@@ -6482,7 +6852,7 @@ iwx_assoc(struct iwx_softc *sc)
        }
 
        if (!update_sta)
-               err = iwx_enable_data_tx_queues(sc);
+               err = iwx_enable_mgmt_queue(sc);
 
        return err;
 }
@@ -6491,12 +6861,19 @@ int
 iwx_disassoc(struct iwx_softc *sc)
 {
        struct ieee80211com *ic = &sc->sc_ic;
-       struct iwx_node *in = (void *)ic->ic_bss;
-       int err;
+       struct ieee80211_node *ni = ic->ic_bss;
+       struct iwx_node *in = (void *)ni;
+       int err, i;
 
        splassert(IPL_NET);
 
        if (sc->sc_flags & IWX_FLAG_STA_ACTIVE) {
+               err = iwx_flush_sta(sc, in);
+               if (err) {
+                       printf("%s: could not flush Tx path (error %d)\n",
+                           DEVNAME(sc), err);
+                       return err;
+               }
                err = iwx_rm_sta_cmd(sc, in);
                if (err) {
                        printf("%s: could not remove STA (error %d)\n",
@@ -6506,10 +6883,19 @@ iwx_disassoc(struct iwx_softc *sc)
                sc->sc_flags &= ~IWX_FLAG_STA_ACTIVE;
                in->in_flags = 0;
                sc->sc_rx_ba_sessions = 0;
-               sc->ba_start_tidmask = 0;
-               sc->ba_stop_tidmask = 0;
-               sc->ba_start_tidmask = 0;
-               sc->ba_stop_tidmask = 0;
+               sc->ba_rx.start_tidmask = 0;
+               sc->ba_rx.stop_tidmask = 0;
+               memset(sc->aggqid, 0, sizeof(sc->aggqid));
+               sc->ba_tx.start_tidmask = 0;
+               sc->ba_tx.stop_tidmask = 0;
+               for (i = IWX_FIRST_AGG_TX_QUEUE; i < IWX_LAST_AGG_TX_QUEUE; i++)
+                       sc->qenablemsk &= ~(1 << i);
+               for (i = 0; i < IEEE80211_NUM_TID; i++) {
+                       struct ieee80211_tx_ba *ba = &ni->ni_tx_ba[i];
+                       if (ba->ba_state != IEEE80211_BA_AGREED)
+                               continue;
+                       ieee80211_delba_request(ic, ni, 0, 1, i);
+               }
        }
 
        return 0;
@@ -6960,6 +7346,7 @@ iwx_newstate(struct ieee80211com *ic, enum ieee80211_s
                memset(sc->setkey_arg, 0, sizeof(sc->setkey_arg));
                sc->setkey_cur = sc->setkey_tail = sc->setkey_nkeys = 0;
                iwx_del_task(sc, systq, &sc->mac_ctxt_task);
+               iwx_del_task(sc, systq, &sc->endbgscan_task);
                for (i = 0; i < nitems(sc->sc_rxba_data); i++) {
                        struct iwx_rxba_data *rxba = &sc->sc_rxba_data[i];
                        iwx_clear_reorder_buffer(sc, rxba);
@@ -6975,6 +7362,46 @@ iwx_newstate(struct ieee80211com *ic, enum ieee80211_s
 }
 
 void
+iwx_endbgscan_task(void *arg)
+{
+       struct iwx_softc *sc = arg;
+       struct ieee80211com *ic = &sc->sc_ic;
+       struct ieee80211_node *ni = ic->ic_bss;
+       struct iwx_node *in = (void *)ni;
+       int err, s;
+
+       s = splnet();
+
+       if ((sc->sc_flags & IWX_FLAG_SHUTDOWN) ||
+           (sc->sc_flags & (IWX_FLAG_SCANNING | IWX_FLAG_BGSCAN)) == 0 ||
+           ic->ic_state != IEEE80211_S_RUN) {
+               refcnt_rele_wake(&sc->task_refs);
+               splx(s);
+               return;
+       }
+
+       /*
+        * Drain Tx queues before a potential roaming attempt.
+        * There is no better way to ensure that we don't have
+        * any outstanding frames on Tx aggregation queues.
+        */
+       err = iwx_flush_sta(sc, in);
+       if (err) {
+               if ((sc->sc_flags & IWX_FLAG_SHUTDOWN) == 0)
+                       task_add(systq, &sc->init_task);
+               refcnt_rele_wake(&sc->task_refs);
+               splx(s);
+               return;
+       }
+
+       sc->sc_flags &= ~(IWX_FLAG_SCANNING | IWX_FLAG_BGSCAN);
+       ieee80211_end_scan(&ic->ic_if);
+
+       refcnt_rele_wake(&sc->task_refs);
+       splx(s);
+}
+
+void
 iwx_endscan(struct iwx_softc *sc)
 {
        struct ieee80211com *ic = &sc->sc_ic;
@@ -6982,6 +7409,12 @@ iwx_endscan(struct iwx_softc *sc)
        if ((sc->sc_flags & (IWX_FLAG_SCANNING | IWX_FLAG_BGSCAN)) == 0)
                return;
 
+       if (ic->ic_state == IEEE80211_S_RUN &&
+           (sc->sc_flags & IWX_FLAG_BGSCAN)) {
+               iwx_add_task(sc, systq, &sc->endbgscan_task);
+               return;
+       }
+
        sc->sc_flags &= ~(IWX_FLAG_SCANNING | IWX_FLAG_BGSCAN);
        ieee80211_end_scan(&ic->ic_if);
 }
@@ -7455,7 +7888,6 @@ iwx_start(struct ifnet *ifp)
        struct ieee80211_node *ni;
        struct ether_header *eh;
        struct mbuf *m;
-       int ac = EDCA_AC_BE; /* XXX */
 
        if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd))
                return;
@@ -7467,6 +7899,10 @@ iwx_start(struct ifnet *ifp)
                        break;
                }
 
+               /* Don't queue additional frames while flushing Tx queues. */
+               if (sc->sc_flags & IWX_FLAG_TXFLUSH)
+                       break;
+
                /* need to send management frames even if we're not RUNning */
                m = mq_dequeue(&ic->ic_mgtq);
                if (m) {
@@ -7500,7 +7936,7 @@ iwx_start(struct ifnet *ifp)
                if (ic->ic_rawbpf != NULL)
                        bpf_mtap(ic->ic_rawbpf, m, BPF_DIRECTION_OUT);
 #endif
-               if (iwx_tx(sc, m, ni, ac) != 0) {
+               if (iwx_tx(sc, m, ni) != 0) {
                        ieee80211_release_node(ic, ni);
                        ifp->if_oerrors++;
                        continue;
@@ -7535,6 +7971,7 @@ iwx_stop(struct ifnet *ifp)
        memset(sc->setkey_arg, 0, sizeof(sc->setkey_arg));
        sc->setkey_cur = sc->setkey_tail = sc->setkey_nkeys = 0;
        iwx_del_task(sc, systq, &sc->mac_ctxt_task);
+       iwx_del_task(sc, systq, &sc->endbgscan_task);
        KASSERT(sc->task_refs.refs >= 1);
        refcnt_finalize(&sc->task_refs, "iwxstop");
 
@@ -7561,13 +7998,14 @@ iwx_stop(struct ifnet *ifp)
        sc->sc_flags &= ~IWX_FLAG_TE_ACTIVE;
        sc->sc_flags &= ~IWX_FLAG_HW_ERR;
        sc->sc_flags &= ~IWX_FLAG_SHUTDOWN;
+       sc->sc_flags &= ~IWX_FLAG_TXFLUSH;
 
        sc->sc_rx_ba_sessions = 0;
-       sc->ba_start_tidmask = 0;
-       sc->ba_stop_tidmask = 0;
-       memset(sc->ba_ssn, 0, sizeof(sc->ba_ssn));
-       memset(sc->ba_winsize, 0, sizeof(sc->ba_winsize));
-       memset(sc->ba_timeout_val, 0, sizeof(sc->ba_timeout_val));
+       sc->ba_rx.start_tidmask = 0;
+       sc->ba_rx.stop_tidmask = 0;
+       memset(sc->aggqid, 0, sizeof(sc->aggqid));
+       sc->ba_tx.start_tidmask = 0;
+       sc->ba_tx.stop_tidmask = 0;
 
        sc->sc_newstate(ic, IEEE80211_S_INIT, -1);
 
@@ -8014,6 +8452,10 @@ iwx_rx_pkt(struct iwx_softc *sc, struct iwx_rx_data *d
                        iwx_rx_tx_cmd(sc, pkt, data);
                        break;
 
+               case IWX_BA_NOTIF:
+                       iwx_rx_compressed_ba(sc, pkt, data);
+                       break;
+
                case IWX_MISSED_BEACONS_NOTIFICATION:
                        iwx_rx_bmiss(sc, pkt, data);
                        break;
@@ -8344,7 +8786,7 @@ iwx_intr(void *arg)
 
                /* Dump driver status (TX and RX rings) while we're here. */
                printf("driver status:\n");
-               for (i = 0; i < IWX_MAX_QUEUES; i++) {
+               for (i = 0; i < nitems(sc->txq); i++) {
                        struct iwx_tx_ring *ring = &sc->txq[i];
                        printf("  tx ring %2d: qid=%-2d cur=%-3d "
                            "queued=%-3d\n",
@@ -8454,7 +8896,7 @@ iwx_intr_msix(void *arg)
 
                /* Dump driver status (TX and RX rings) while we're here. */
                printf("driver status:\n");
-               for (i = 0; i < IWX_MAX_QUEUES; i++) {
+               for (i = 0; i < nitems(sc->txq); i++) {
                        struct iwx_tx_ring *ring = &sc->txq[i];
                        printf("  tx ring %2d: qid=%-2d cur=%-3d "
                            "queued=%-3d\n",
@@ -8875,6 +9317,8 @@ iwx_attach(struct device *parent, struct device *self,
 
        /* Set device capabilities. */
        ic->ic_caps =
+           IEEE80211_C_QOS | IEEE80211_C_TX_AMPDU | /* A-MPDU */
+           IEEE80211_C_ADDBA_OFFLOAD | /* device sends ADDBA/DELBA frames */
            IEEE80211_C_WEP |           /* WEP */
            IEEE80211_C_RSN |           /* WPA/RSN */
            IEEE80211_C_SCANALL |       /* device scans all channels at once */
@@ -8934,6 +9378,7 @@ iwx_attach(struct device *parent, struct device *self,
        task_set(&sc->ba_task, iwx_ba_task, sc);
        task_set(&sc->setkey_task, iwx_setkey_task, sc);
        task_set(&sc->mac_ctxt_task, iwx_mac_ctxt_task, sc);
+       task_set(&sc->endbgscan_task, iwx_endbgscan_task, sc);
 
        ic->ic_node_alloc = iwx_node_alloc;
        ic->ic_bgscan_start = iwx_bgscan;
@@ -8948,10 +9393,8 @@ iwx_attach(struct device *parent, struct device *self,
        ic->ic_updateedca = iwx_updateedca;
        ic->ic_ampdu_rx_start = iwx_ampdu_rx_start;
        ic->ic_ampdu_rx_stop = iwx_ampdu_rx_stop;
-#ifdef notyet
        ic->ic_ampdu_tx_start = iwx_ampdu_tx_start;
-       ic->ic_ampdu_tx_stop = iwx_ampdu_tx_stop;
-#endif
+       ic->ic_ampdu_tx_stop = NULL;
        /*
         * We cannot read the MAC address without loading the
         * firmware from disk. Postpone until mountroot is done.
blob - 55422b8d6961837cf9281b56c8671b1aea163b57
blob + 85b1a96d54af4118a4c181e87f79fa85ac2bce8d
--- sys/dev/pci/if_iwxreg.h
+++ sys/dev/pci/if_iwxreg.h
@@ -1346,9 +1346,6 @@ struct iwx_gen3_bc_tbl {
        uint16_t tfd_offset[IWX_TFD_QUEUE_BC_SIZE_GEN3];
 } __packed;
 
-/* Maximum number of Tx queues. */
-#define IWX_MAX_QUEUES 31
-
 /**
  * DQA - Dynamic Queue Allocation -introduction
  *
@@ -1363,27 +1360,27 @@ struct iwx_gen3_bc_tbl {
  * some queues that are statically allocated:
  *     TXQ #0 - command queue
  *     TXQ #1 - aux frames
- *     TXQ #2 - P2P device frames
- *     TXQ #3 - P2P GO/SoftAP GCAST/BCAST frames
- *     TXQ #4 - BSS DATA frames queue
- *     TXQ #5-8 - non-QoS data, QoS no-data, and MGMT frames queue pool
- *     TXQ #9 - P2P GO/SoftAP probe responses
- *     TXQ #10-31 - QoS DATA frames queue pool (for Tx aggregation)
  */
 
 /* static DQA Tx queue numbers */
 #define IWX_DQA_CMD_QUEUE              0
 #define IWX_DQA_AUX_QUEUE              1
-#define IWX_DQA_P2P_DEVICE_QUEUE       2
-#define IWX_DQA_INJECT_MONITOR_QUEUE   2
-#define IWX_DQA_GCAST_QUEUE            3
-#define IWX_DQA_BSS_CLIENT_QUEUE       4
-#define IWX_DQA_MIN_MGMT_QUEUE         5
-#define IWX_DQA_MAX_MGMT_QUEUE         8
-#define IWX_DQA_AP_PROBE_RESP_QUEUE    9
-#define IWX_DQA_MIN_DATA_QUEUE         10
-#define IWX_DQA_MAX_DATA_QUEUE         31
 
+#define IWX_DQA_INJECT_MONITOR_QUEUE   2 /* used in monitor mode only */
+#define IWX_DQA_MGMT_QUEUE             2 /* default queue other modes */
+
+/* Reserve 8 DQA Tx queues for QoS data frames. */
+#define IWX_MAX_TID_COUNT      8
+#define IWX_FIRST_AGG_TX_QUEUE (IWX_DQA_MGMT_QUEUE + 1)
+#define IWX_LAST_AGG_TX_QUEUE  (IWX_FIRST_AGG_TX_QUEUE + IWX_MAX_TID_COUNT - 1)
+
+/**
+ * Max Tx window size is the max number of contiguous TFDs that the scheduler
+ * can keep track of at one time when creating block-ack chains of frames.
+ * Note that "64" matches the number of ack bits in a block-ack packet.
+ */
+#define IWX_FRAME_LIMIT        64
+
 #define IWX_TX_FIFO_BK 0
 #define IWX_TX_FIFO_BE 1
 #define IWX_TX_FIFO_VI 2
@@ -4732,8 +4729,8 @@ struct iwx_tlc_update_notif {
 /*
  * TID for non QoS frames - to be written in tid_tspec
  */
-#define IWX_MAX_TID_COUNT      8
 #define IWX_TID_NON_QOS        0
+#define IWX_TID_MGMT           15
 
 /*
  * Limits on the retransmissions - to be written in {data,rts}_retry_limit
@@ -5070,35 +5067,96 @@ struct iwx_tx_resp {
 } __packed; /* TX_RSP_API_S_VER_6 */
 
 /**
- * struct iwx_ba_notif - notifies about reception of BA
- * ( IWX_BA_NOTIF = 0xc5 )
- * @sta_addr_lo32: lower 32 bits of the MAC address
- * @sta_addr_hi16: upper 16 bits of the MAC address
+ * struct iwx_compressed_ba_tfd - progress of a TFD queue
+ * @q_num: TFD queue number
+ * @tfd_index: Index of first un-acked frame in the  TFD queue
+ * @scd_queue: For debug only - the physical queue the TFD queue is bound to
+ * @tid: TID of the queue (0-7)
+ * @reserved: reserved for alignment
+ */
+struct iwx_compressed_ba_tfd {
+       uint16_t q_num;
+       uint16_t tfd_index;
+       uint8_t scd_queue;
+       uint8_t tid;
+       uint8_t reserved[2];
+} __packed; /* COMPRESSED_BA_TFD_API_S_VER_1 */
+
+/**
+ * struct iwx_compressed_ba_ratid - progress of a RA TID queue
+ * @q_num: RA TID queue number
+ * @tid: TID of the queue
+ * @ssn: BA window current SSN
+ */
+struct iwx_compressed_ba_ratid {
+       uint8_t q_num;
+       uint8_t tid;
+       uint16_t ssn;
+} __packed; /* COMPRESSED_BA_RATID_API_S_VER_1 */
+
+/*
+ * enum iwx_ba_resp_flags - TX aggregation status
+ * @IWX_MVM_BA_RESP_TX_AGG: generated due to BA
+ * @IWX_MVM_BA_RESP_TX_BAR: generated due to BA after BAR
+ * @IWX_MVM_BA_RESP_TX_AGG_FAIL: aggregation didn't receive BA
+ * @IWX_MVM_BA_RESP_TX_UNDERRUN: aggregation got underrun
+ * @IWX_MVM_BA_RESP_TX_BT_KILL: aggregation got BT-kill
+ * @IWX_MVM_BA_RESP_TX_DSP_TIMEOUT: aggregation didn't finish within the
+ *     expected time
+ */
+enum iwx_ba_resp_flags {
+       IWX_MVM_BA_RESP_TX_AGG,
+       IWX_MVM_BA_RESP_TX_BAR,
+       IWX_MVM_BA_RESP_TX_AGG_FAIL,
+       IWX_MVM_BA_RESP_TX_UNDERRUN,
+       IWX_MVM_BA_RESP_TX_BT_KILL,
+       IWX_MVM_BA_RESP_TX_DSP_TIMEOUT
+};
+
+/**
+ * struct iwx_compressed_ba_notif - notifies about reception of BA
+ * ( BA_NOTIF = 0xc5 )
+ * @flags: status flag, see the &iwx_ba_resp_flags
  * @sta_id: Index of recipient (BA-sending) station in fw's station table
- * @tid: tid of the session
- * @seq_ctl:
- * @bitmap: the bitmap of the BA notification as seen in the air
- * @scd_flow: the tx queue this BA relates to
- * @scd_ssn: the index of the last contiguously sent packet
- * @txed: number of Txed frames in this batch
- * @txed_2_done: number of Acked frames in this batch
+ * @reduced_txp: power reduced according to TPC. This is the actual value and
+ *     not a copy from the LQ command. Thus, if not the first rate was used
+ *     for Tx-ing then this value will be set to 0 by FW.
+ * @tlc_rate_info: TLC rate info, initial rate index, TLC table color
+ * @retry_cnt: retry count
+ * @query_byte_cnt: SCD query byte count
+ * @query_frame_cnt: SCD query frame count
+ * @txed: number of frames sent in the aggregation (all-TIDs)
+ * @done: number of frames that were Acked by the BA (all-TIDs)
+ * @reserved: reserved (for alignment)
+ * @wireless_time: Wireless-media time
+ * @tx_rate: the rate the aggregation was sent at
+ * @tfd_cnt: number of TFD-Q elements
+ * @ra_tid_cnt: number of RATID-Q elements
+ * @tfd: array of TFD queue status updates. See &iwx_compressed_ba_tfd
+ *     for details. Length in @tfd_cnt.
+ * @ra_tid: array of RA-TID queue status updates. For debug purposes only. See
+ *     &iwx_compressed_ba_ratid for more details. Length in @ra_tid_cnt.
  */
-struct iwx_ba_notif {
-       uint32_t sta_addr_lo32;
-       uint16_t sta_addr_hi16;
-       uint16_t reserved;
-
+struct iwx_compressed_ba_notif {
+       uint32_t flags;
        uint8_t sta_id;
-       uint8_t tid;
-       uint16_t seq_ctl;
-       uint64_t bitmap;
-       uint16_t scd_flow;
-       uint16_t scd_ssn;
-       uint8_t txed;
-       uint8_t txed_2_done;
-       uint16_t reserved1;
-} __packed;
+       uint8_t reduced_txp;
+       uint8_t tlc_rate_info;
+       uint8_t retry_cnt;
+       uint32_t query_byte_cnt;
+       uint16_t query_frame_cnt;
+       uint16_t txed;
+       uint16_t done;
+       uint16_t reserved;
+       uint32_t wireless_time;
+       uint32_t tx_rate;
+       uint16_t tfd_cnt;
+       uint16_t ra_tid_cnt;
+       struct iwx_compressed_ba_ratid ra_tid[0];
+       struct iwx_compressed_ba_tfd tfd[];
+} __packed; /* COMPRESSED_BA_RES_API_S_VER_4 */
 
+
 struct iwx_beacon_notif {
        struct iwx_tx_resp_v3 beacon_notify_hdr;
        uint64_t tsf;
@@ -5136,7 +5194,34 @@ struct iwx_tx_path_flush_cmd {
        uint16_t reserved;
 } __packed; /* TX_PATH_FLUSH_CMD_API_S_VER_2 */
 
+#define IWX_TX_FLUSH_QUEUE_RSP 16
+
 /**
+ * struct iwx_flush_queue_info - virtual flush queue info
+ * @queue_num: virtual queue id
+ * @read_before_flush: read pointer before flush
+ * @read_after_flush: read pointer after flush
+ */
+struct iwx_flush_queue_info {
+       uint16_t tid;
+       uint16_t queue_num;
+       uint16_t read_before_flush;
+       uint16_t read_after_flush;
+} __packed; /* TFDQ_FLUSH_INFO_API_S_VER_1 */
+
+/**
+ * struct iwx_tx_path_flush_cmd_rsp -- queue/FIFO flush command response
+ * @num_flushed_queues: number of queues in queues array
+ * @queues: all flushed queues
+ */
+struct iwx_tx_path_flush_cmd_rsp {
+       uint16_t sta_id;
+       uint16_t num_flushed_queues;
+       struct iwx_flush_queue_info queues[IWX_TX_FLUSH_QUEUE_RSP];
+} __packed; /* TX_PATH_FLUSH_CMD_RSP_API_S_VER_1 */
+
+
+/**
  * iwx_get_scd_ssn - returns the SSN of the SCD
  * @tx_resp: the Tx response from the fw (agg or non-agg)
  *
blob - 928b4d9ec79fed1937e4e147b905cf0da7c36750
blob + dcd53368e2dda1cb5685ead554fe050b0befbb02
--- sys/dev/pci/if_iwxvar.h
+++ sys/dev/pci/if_iwxvar.h
@@ -228,9 +228,6 @@ struct iwx_dma_info {
 #define IWX_TX_RING_LOMARK     192
 #define IWX_TX_RING_HIMARK     224
 
-/* For aggregation queues, index must be aligned to frame sequence number. */
-#define IWX_AGG_SSN_TO_TXQ_IDX(x)      ((x) & (IWX_TX_RING_COUNT - 1))
-
 struct iwx_tx_data {
        bus_dmamap_t    map;
        bus_addr_t      cmd_paddr;
@@ -249,6 +246,7 @@ struct iwx_tx_ring {
        int                     queued;
        int                     cur;
        int                     tail;
+       int                     tid;
 };
 
 #define IWX_RX_MQ_RING_COUNT   512
@@ -281,6 +279,7 @@ struct iwx_rx_ring {
 #define IWX_FLAG_HW_ERR                0x80    /* hardware error occurred */
 #define IWX_FLAG_SHUTDOWN      0x100   /* shutting down; new tasks forbidden */
 #define IWX_FLAG_BGSCAN                0x200   /* background scan in progress 
*/
+#define IWX_FLAG_TXFLUSH       0x400   /* Tx queue flushing in progress */
 
 struct iwx_ucode_status {
        uint32_t uc_lmac_error_event_table[2];
@@ -447,6 +446,11 @@ struct iwx_setkey_task_arg {
        struct ieee80211_key *k;
 };
 
+struct iwx_ba_task_data {
+       uint32_t                start_tidmask;
+       uint32_t                stop_tidmask;
+};
+
 struct iwx_softc {
        struct device sc_dev;
        struct ieee80211com sc_ic;
@@ -461,11 +465,8 @@ struct iwx_softc {
 
        /* Task for firmware BlockAck setup/teardown and its arguments. */
        struct task             ba_task;
-       uint32_t                ba_start_tidmask;
-       uint32_t                ba_stop_tidmask;
-       uint16_t                ba_ssn[IWX_MAX_TID_COUNT];
-       uint16_t                ba_winsize[IWX_MAX_TID_COUNT];
-       int                     ba_timeout_val[IWX_MAX_TID_COUNT];
+       struct iwx_ba_task_data ba_rx;
+       struct iwx_ba_task_data ba_tx;
 
        /* Task for setting encryption keys and its arguments. */
        struct task             setkey_task;
@@ -484,6 +485,9 @@ struct iwx_softc {
        /* Task for ERP/HT prot/slot-time/EDCA updates. */
        struct task             mac_ctxt_task;
 
+       /* Task which gracefully ends a background scan. */
+       struct task             endbgscan_task;
+
        bus_space_tag_t sc_st;
        bus_space_handle_t sc_sh;
        bus_size_t sc_sz;
@@ -498,9 +502,11 @@ struct iwx_softc {
        uint32_t                        sched_base;
 
        /* TX/RX rings. */
-       struct iwx_tx_ring txq[IWX_MAX_QUEUES];
+       struct iwx_tx_ring txq[IWX_LAST_AGG_TX_QUEUE];
        struct iwx_rx_ring rxq;
        int qfullmsk;
+       int qenablemsk;
+       int aggqid[IEEE80211_NUM_TID];
 
        int sc_sf_state;
 
blob - bf7abf058b86c4b0ad402a22c2e3f3c35e81788e
blob + b3b30f1f98f689c6d066cff04b07b938fde0cc68
--- sys/net80211/ieee80211_proto.c
+++ sys/net80211/ieee80211_proto.c
@@ -700,6 +700,18 @@ ieee80211_addba_request(struct ieee80211com *ic, struc
                /* immediate BA */
                ba->ba_params |= IEEE80211_ADDBA_BA_POLICY;
 
+       if ((ic->ic_caps & IEEE80211_C_ADDBA_OFFLOAD) &&
+           ic->ic_ampdu_tx_start != NULL) {
+               int err = ic->ic_ampdu_tx_start(ic, ni, tid);
+               if (err && err != EBUSY) {
+                       /* driver failed to setup, rollback */
+                       ieee80211_addba_resp_refuse(ic, ni, tid,
+                           IEEE80211_STATUS_UNSPECIFIED);
+               } else if (err == 0)
+                       ieee80211_addba_resp_accept(ic, ni, tid);
+               return err; /* The device will send an ADDBA frame. */
+       }
+
        timeout_add_sec(&ba->ba_to, 1); /* dot11ADDBAResponseTimeout */
        IEEE80211_SEND_ACTION(ic, ni, IEEE80211_CATEG_BA,
            IEEE80211_ACTION_ADDBA_REQ, tid);
@@ -948,6 +960,13 @@ ieee80211_stop_ampdu_tx(struct ieee80211com *ic, struc
                struct ieee80211_tx_ba *ba = &ni->ni_tx_ba[tid];
                if (ba->ba_state != IEEE80211_BA_AGREED)
                        continue;
+
+               if (ic->ic_caps & IEEE80211_C_ADDBA_OFFLOAD) {
+                       if (ic->ic_ampdu_tx_stop != NULL)
+                               ic->ic_ampdu_tx_stop(ic, ni, tid);
+                       continue; /* Don't change ba->ba_state! */
+               }
+
                ieee80211_delba_request(ic, ni,
                    mgt == -1 ? 0 : IEEE80211_REASON_AUTH_LEAVE, 1, tid);
        }
blob - 334439887ffe54232e2ccc488b975f1cda5a26a3
blob + 0ca42aaf5b0af1be78d69cc692c02bc9a35b123a
--- sys/net80211/ieee80211_var.h
+++ sys/net80211/ieee80211_var.h
@@ -432,6 +432,7 @@ struct ieee80211_ess {
 #define IEEE80211_C_RAWCTL     0x00004000      /* CAPABILITY: raw ctl */
 #define IEEE80211_C_SCANALLBAND        0x00008000      /* CAPABILITY: scan all 
bands */
 #define IEEE80211_C_TX_AMPDU   0x00010000      /* CAPABILITY: send A-MPDU */
+#define IEEE80211_C_ADDBA_OFFLOAD 0x00020000   /* CAPABILITY: ADDBA offload */
 
 /* flags for ieee80211_fix_rate() */
 #define        IEEE80211_F_DOSORT      0x00000001      /* sort rate list */

Reply via email to