On Sat, Sep 11, 2021 at 02:04:32PM +0200, Stefan Sperling wrote:
> On Fri, Sep 10, 2021 at 06:49:49PM +0200, Stefan Sperling wrote:
> > Here is another attempt at adding Tx aggregation to iwx(4).
> > This patch is based on the latest state in CVS (if_iwx.c r1.107, which
> > I have committed a minute ago). Sync your tree before applying this patch.
> > 
> > Compared to previous iterations of this patch, I have fixed bugs which
> > caused fatal firmware errors and which made traffic stall after roaming.
> > 
> > This patch could still make 7.0 release if it gets sufficient test coverage.
> > Please run with this and report any regressions. Thanks!
> > 
> > So far, tested by me on AX200 and AX201 against a Pepwave 11ac AP.
> > I have so far not seen any fatal firmware errors, and roaming between 2GHz
> > and 5GHz channels offered by the same AP seems to work reliably.
> > Throughput goes up to 100 Mbit/s max.
> 
> The previous version had a problem where it did not take frames
> off the Tx ring when they were done. It is possible that this
> could lead to memory corruption (seen by mlarkin).
> 
> Please run this updated patch instead.
> 
> And please enable 'ifconfig iwx0 debug' while testing this patch.
> Problem reports will be a lot more useful with debug enabled :)

I have extracted two important fixes from this patch which have now
been committed to -current.
Below is a newly rebased diff which contains the remaining changes.

To prevent merge conflicts when updating, I would recommend to remove
changes made by the previous patch, update the source tree, and apply
the patch below on top of this clean updated tree.

diff refs/heads/master refs/heads/iwx-txagg
blob - 12f6f9eefd77c4d52a3c9d738326774f949fa1be
blob + e96fe29fe839fc15abcdc28eedb67bd6a85aef23
--- sys/dev/pci/if_iwx.c
+++ sys/dev/pci/if_iwx.c
@@ -318,18 +318,16 @@ int       iwx_ampdu_rx_start(struct ieee80211com *, 
struct i
            uint8_t);
 void   iwx_ampdu_rx_stop(struct ieee80211com *, struct ieee80211_node *,
            uint8_t);
+int    iwx_ampdu_tx_start(struct ieee80211com *, struct ieee80211_node *,
+           uint8_t);
 void   iwx_rx_ba_session_expired(void *);
 void   iwx_rx_bar_frame_release(struct iwx_softc *, struct iwx_rx_packet *,
            struct iwx_rx_data *, struct mbuf_list *);
 void   iwx_reorder_timer_expired(void *);
 void   iwx_sta_rx_agg(struct iwx_softc *, struct ieee80211_node *, uint8_t,
            uint16_t, uint16_t, int, int);
-#ifdef notyet
-int    iwx_ampdu_tx_start(struct ieee80211com *, struct ieee80211_node *,
+void   iwx_sta_tx_agg_start(struct iwx_softc *, struct ieee80211_node *,
            uint8_t);
-void   iwx_ampdu_tx_stop(struct ieee80211com *, struct ieee80211_node *,
-           uint8_t);
-#endif
 void   iwx_ba_task(void *);
 
 int    iwx_set_mac_addr_from_csr(struct iwx_softc *, struct iwx_nvm_data *);
@@ -355,10 +353,13 @@ int       iwx_ccmp_decap(struct iwx_softc *, struct mbuf 
*,
            struct ieee80211_node *, struct ieee80211_rxinfo *);
 void   iwx_rx_frame(struct iwx_softc *, struct mbuf *, int, uint32_t, int, int,
            uint32_t, struct ieee80211_rxinfo *, struct mbuf_list *);
-void   iwx_rx_tx_cmd_single(struct iwx_softc *, struct iwx_rx_packet *,
-           struct iwx_node *);
+void   iwx_clear_tx_desc(struct iwx_softc *, struct iwx_tx_ring *, int);
+void   iwx_txd_done(struct iwx_softc *, struct iwx_tx_data *);
+void   iwx_tx_ba_move_window(struct ieee80211com *, int, struct mbuf *);
+void   iwx_txq_advance(struct iwx_softc *, struct iwx_tx_ring *, int);
 void   iwx_rx_tx_cmd(struct iwx_softc *, struct iwx_rx_packet *,
            struct iwx_rx_data *);
+void   iwx_clear_oactive(struct iwx_softc *, struct iwx_tx_ring *);
 void   iwx_rx_bmiss(struct iwx_softc *, struct iwx_rx_packet *,
            struct iwx_rx_data *);
 int    iwx_binding_cmd(struct iwx_softc *, struct iwx_node *, uint32_t);
@@ -382,8 +383,11 @@ void       iwx_cmd_done(struct iwx_softc *, int, int, int);
 const struct iwx_rate *iwx_tx_fill_cmd(struct iwx_softc *, struct iwx_node *,
            struct ieee80211_frame *, struct iwx_tx_cmd_gen2 *);
 void   iwx_tx_update_byte_tbl(struct iwx_tx_ring *, int, uint16_t, uint16_t);
-int    iwx_tx(struct iwx_softc *, struct mbuf *, struct ieee80211_node *, int);
-int    iwx_flush_tx_path(struct iwx_softc *);
+int    iwx_tx(struct iwx_softc *, struct mbuf *, struct ieee80211_node *);
+int    iwx_flush_sta_tids(struct iwx_softc *, int, uint16_t);
+int    iwx_wait_tx_queues_empty(struct iwx_softc *);
+int    iwx_drain_sta(struct iwx_softc *sc, struct iwx_node *, int);
+int    iwx_flush_sta(struct iwx_softc *, struct iwx_node *);
 int    iwx_beacon_filter_send_cmd(struct iwx_softc *,
            struct iwx_beacon_filter_cmd *);
 int    iwx_update_beacon_abort(struct iwx_softc *, struct iwx_node *, int);
@@ -396,6 +400,7 @@ int iwx_disable_beacon_filter(struct iwx_softc *);
 int    iwx_add_sta_cmd(struct iwx_softc *, struct iwx_node *, int);
 int    iwx_add_aux_sta(struct iwx_softc *);
 int    iwx_rm_sta_cmd(struct iwx_softc *, struct iwx_node *);
+int    iwx_rm_sta(struct iwx_softc *, struct iwx_node *);
 int    iwx_fill_probe_req(struct iwx_softc *, struct iwx_scan_probe_req *);
 int    iwx_config_umac_scan_reduced(struct iwx_softc *);
 int    iwx_config_umac_scan(struct iwx_softc *);
@@ -425,6 +430,7 @@ int iwx_scan(struct iwx_softc *);
 int    iwx_bgscan(struct ieee80211com *);
 int    iwx_umac_scan_abort(struct iwx_softc *);
 int    iwx_scan_abort(struct iwx_softc *);
+int    iwx_enable_mgmt_queue(struct iwx_softc *);
 int    iwx_rs_rval2idx(uint8_t);
 uint16_t iwx_rs_ht_rates(struct iwx_softc *, struct ieee80211_node *, int);
 int    iwx_rs_init(struct iwx_softc *, struct iwx_node *);
@@ -1776,20 +1782,20 @@ iwx_alloc_tx_ring(struct iwx_softc *sc, struct iwx_tx_
        ring->desc = ring->desc_dma.vaddr;
 
        /*
-        * There is no need to allocate DMA buffers for unused rings.
-        * The hardware supports up to 31 Tx rings which is more
+        * The hardware supports up to 512 Tx rings which is more
         * than we currently need.
         *
-        * In DQA mode we use 1 command queue + 4 DQA mgmt/data queues.
-        * The command is queue 0 (sc->txq[0]), and 4 mgmt/data frame queues
-        * are sc->tqx[ac + IWX_DQA_AUX_QUEUE + 1], i.e. sc->txq[2:5],
-        * in order to provide one queue per EDCA category.
+        * In DQA mode we use 1 command queue + 1 default queue for
+        * managment, control, and non-QoS data frames.
+        * The command is queue sc->txq[0], our default queue is sc->txq[1].
         *
-        * Tx aggregation will require additional queues (one queue per TID
-        * for which aggregation is enabled) but we do not implement this yet.
+        * Tx aggregation requires additional queues, one queue per TID for
+        * which aggregation is enabled. We map TID 0-7 to sc->txq[2:9].
+        * Firmware may assign its own internal IDs for these queues
+        * depending on which TID gets aggregation enabled first.
+        * The driver maintains a table mapping driver-side queue IDs
+        * to firmware-side queue IDs.
         */
-       if (qid > IWX_DQA_MIN_MGMT_QUEUE)
-               return 0;
 
        err = iwx_dma_contig_alloc(sc->sc_dmat, &ring->bc_tbl,
            sizeof(struct iwx_agn_scd_bc_tbl), 0);
@@ -1863,9 +1869,17 @@ iwx_reset_tx_ring(struct iwx_softc *sc, struct iwx_tx_
        bus_dmamap_sync(sc->sc_dmat, ring->desc_dma.map, 0,
            ring->desc_dma.size, BUS_DMASYNC_PREWRITE);
        sc->qfullmsk &= ~(1 << ring->qid);
+       sc->qenablemsk &= ~(1 << ring->qid);
+       for (i = 0; i < nitems(sc->aggqid); i++) {
+               if (sc->aggqid[i] == ring->qid) {
+                       sc->aggqid[i] = 0;
+                       break;
+               }
+       }
        ring->queued = 0;
        ring->cur = 0;
        ring->tail = 0;
+       ring->tid = 0;
 }
 
 void
@@ -2372,15 +2386,23 @@ iwx_start_hw(struct iwx_softc *sc)
 void
 iwx_stop_device(struct iwx_softc *sc)
 {
-       int qid;
+       struct ieee80211com *ic = &sc->sc_ic;
+       struct ieee80211_node *ni = ic->ic_bss;
+       int i;
 
        iwx_disable_interrupts(sc);
        sc->sc_flags &= ~IWX_FLAG_USE_ICT;
 
        iwx_disable_rx_dma(sc);
        iwx_reset_rx_ring(sc, &sc->rxq);
-       for (qid = 0; qid < nitems(sc->txq); qid++)
-               iwx_reset_tx_ring(sc, &sc->txq[qid]);
+       for (i = 0; i < nitems(sc->txq); i++)
+               iwx_reset_tx_ring(sc, &sc->txq[i]);
+       for (i = 0; i < IEEE80211_NUM_TID; i++) {
+               struct ieee80211_tx_ba *ba = &ni->ni_tx_ba[i];
+               if (ba->ba_state != IEEE80211_BA_AGREED)
+                       continue;
+               ieee80211_delba_request(ic, ni, 0, 1, i);
+       }
 
        /* Make sure (redundant) we've released our request to stay awake */
        IWX_CLRBITS(sc, IWX_CSR_GP_CNTRL,
@@ -2487,6 +2509,18 @@ iwx_nic_init(struct iwx_softc *sc)
        return 0;
 }
 
+/* Map a TID to an ieee80211_edca_ac category. */
+const uint8_t iwx_tid_to_ac[IWX_MAX_TID_COUNT] = {
+       EDCA_AC_BE,
+       EDCA_AC_BK,
+       EDCA_AC_BK,
+       EDCA_AC_BE,
+       EDCA_AC_VI,
+       EDCA_AC_VI,
+       EDCA_AC_VO,
+       EDCA_AC_VO,
+};
+
 /* Map ieee80211_edca_ac categories to firmware Tx FIFO. */
 const uint8_t iwx_ac_to_tx_fifo[] = {
        IWX_GEN2_EDCA_TX_FIFO_BE,
@@ -2559,6 +2593,9 @@ iwx_enable_txq(struct iwx_softc *sc, int sta_id, int q
                err = EIO;
                goto out;
        }
+
+       sc->qenablemsk |= (1 << qid);
+       ring->tid = tid;
 out:
        iwx_free_resp(sc, &hcmd);
        return err;
@@ -3146,6 +3183,60 @@ iwx_updateedca(struct ieee80211com *ic)
 }
 
 void
+iwx_sta_tx_agg_start(struct iwx_softc *sc, struct ieee80211_node *ni,
+    uint8_t tid)
+{
+       struct ieee80211com *ic = &sc->sc_ic;
+       struct ieee80211_tx_ba *ba;
+       int err, qid;
+       struct iwx_tx_ring *ring;
+
+       /* Ensure we can map this TID to an aggregation queue. */
+       if (tid >= IWX_MAX_TID_COUNT)
+               return;
+
+       ba = &ni->ni_tx_ba[tid];
+       if (ba->ba_state != IEEE80211_BA_REQUESTED)
+               return;
+
+       qid = sc->aggqid[tid];
+       if (qid == 0) {
+               /* Firmware should pick the next unused Tx queue. */
+               qid = fls(sc->qenablemsk);
+       }
+
+       /*
+        * Simply enable the queue.
+        * Firmware handles Tx Ba session setup and teardown.
+        */
+       if ((sc->qenablemsk & (1 << qid)) == 0) {
+               if (!iwx_nic_lock(sc)) {
+                       ieee80211_addba_resp_refuse(ic, ni, tid,
+                           IEEE80211_STATUS_UNSPECIFIED);
+                       return;
+               }
+               err = iwx_enable_txq(sc, IWX_STATION_ID, qid, tid,
+                   IWX_TX_RING_COUNT);
+               iwx_nic_unlock(sc);
+               if (err) {
+                       printf("%s: could not enable Tx queue %d "
+                           "(error %d)\n", DEVNAME(sc), qid, err);
+                       ieee80211_addba_resp_refuse(ic, ni, tid,
+                           IEEE80211_STATUS_UNSPECIFIED);
+                       return;
+               }
+
+               ba->ba_winstart = 0;
+       } else
+               ba->ba_winstart = ni->ni_qos_txseqs[tid];
+
+       ring = &sc->txq[qid];
+       ba->ba_timeout_val = 0;
+       ieee80211_addba_resp_accept(ic, ni, tid);
+       sc->aggqid[tid] = qid;
+}
+
+void
 iwx_ba_task(void *arg)
 {
        struct iwx_softc *sc = arg;
@@ -3157,16 +3248,26 @@ iwx_ba_task(void *arg)
        for (tid = 0; tid < IWX_MAX_TID_COUNT; tid++) {
                if (sc->sc_flags & IWX_FLAG_SHUTDOWN)
                        break;
-               if (sc->ba_start_tidmask & (1 << tid)) {
-                       iwx_sta_rx_agg(sc, ni, tid, sc->ba_ssn[tid],
-                           sc->ba_winsize[tid], sc->ba_timeout_val[tid], 1);
-                       sc->ba_start_tidmask &= ~(1 << tid);
-               } else if (sc->ba_stop_tidmask & (1 << tid)) {
+               if (sc->ba_rx.start_tidmask & (1 << tid)) {
+                       struct ieee80211_rx_ba *ba = &ni->ni_rx_ba[tid];
+                       iwx_sta_rx_agg(sc, ni, tid, ba->ba_winstart,
+                           ba->ba_winsize, ba->ba_timeout_val, 1);
+                       sc->ba_rx.start_tidmask &= ~(1 << tid);
+               } else if (sc->ba_rx.stop_tidmask & (1 << tid)) {
                        iwx_sta_rx_agg(sc, ni, tid, 0, 0, 0, 0);
-                       sc->ba_stop_tidmask &= ~(1 << tid);
+                       sc->ba_rx.stop_tidmask &= ~(1 << tid);
                }
        }
 
+       for (tid = 0; tid < IWX_MAX_TID_COUNT; tid++) {
+               if (sc->sc_flags & IWX_FLAG_SHUTDOWN)
+                       break;
+               if (sc->ba_tx.start_tidmask & (1 << tid)) {
+                       iwx_sta_tx_agg_start(sc, ni, tid);
+                       sc->ba_tx.start_tidmask &= ~(1 << tid);
+               }
+       }
+
        refcnt_rele_wake(&sc->task_refs);
        splx(s);
 }
@@ -3179,17 +3280,16 @@ int
 iwx_ampdu_rx_start(struct ieee80211com *ic, struct ieee80211_node *ni,
     uint8_t tid)
 {
-       struct ieee80211_rx_ba *ba = &ni->ni_rx_ba[tid];
        struct iwx_softc *sc = IC2IFP(ic)->if_softc;
 
        if (sc->sc_rx_ba_sessions >= IWX_MAX_RX_BA_SESSIONS ||
-           tid > IWX_MAX_TID_COUNT || (sc->ba_start_tidmask & (1 << tid)))
+           tid > IWX_MAX_TID_COUNT)
                return ENOSPC;
 
-       sc->ba_start_tidmask |= (1 << tid);
-       sc->ba_ssn[tid] = ba->ba_winstart;
-       sc->ba_winsize[tid] = ba->ba_winsize;
-       sc->ba_timeout_val[tid] = ba->ba_timeout_val;
+       if (sc->ba_rx.start_tidmask & (1 << tid))
+               return EBUSY;
+
+       sc->ba_rx.start_tidmask |= (1 << tid);
        iwx_add_task(sc, systq, &sc->ba_task);
 
        return EBUSY;
@@ -3205,13 +3305,49 @@ iwx_ampdu_rx_stop(struct ieee80211com *ic, struct ieee
 {
        struct iwx_softc *sc = IC2IFP(ic)->if_softc;
 
-       if (tid > IWX_MAX_TID_COUNT || sc->ba_stop_tidmask & (1 << tid))
+       if (tid > IWX_MAX_TID_COUNT || sc->ba_rx.stop_tidmask & (1 << tid))
                return;
 
-       sc->ba_stop_tidmask = (1 << tid);
+       sc->ba_rx.stop_tidmask = (1 << tid);
        iwx_add_task(sc, systq, &sc->ba_task);
 }
 
+int
+iwx_ampdu_tx_start(struct ieee80211com *ic, struct ieee80211_node *ni,
+    uint8_t tid)
+{
+       struct iwx_softc *sc = IC2IFP(ic)->if_softc;
+       struct ieee80211_tx_ba *ba = &ni->ni_tx_ba[tid];
+
+       /*
+        * Require a firmware version which uses an internal AUX queue.
+        * The value of IWX_FIRST_AGG_TX_QUEUE would be incorrect otherwise.
+        */
+       if (sc->first_data_qid != IWX_DQA_CMD_QUEUE + 1)
+               return ENOTSUP;
+
+       /* Ensure we can map this TID to an aggregation queue. */
+       if (tid >= IWX_MAX_TID_COUNT)
+               return EINVAL;
+
+       /* We only support a fixed Tx aggregation window size, for now. */
+       if (ba->ba_winsize != IWX_FRAME_LIMIT)
+               return ENOTSUP;
+
+       /* Is firmware already using an agg queue with this TID? */
+       if (sc->aggqid[tid] != 0)
+               return ENOSPC;
+
+       /* Are we already processing an ADDBA request? */
+       if (sc->ba_tx.start_tidmask & (1 << tid))
+               return EBUSY;
+
+       sc->ba_tx.start_tidmask |= (1 << tid);
+       iwx_add_task(sc, systq, &sc->ba_task);
+
+       return EBUSY;
+}
+
 /* Read the mac address from WFMP registers. */
 int
 iwx_set_mac_addr_from_csr(struct iwx_softc *sc, struct iwx_nvm_data *data)
@@ -4368,25 +4504,6 @@ iwx_rx_mpdu_mq(struct iwx_softc *sc, struct mbuf *m, v
 }
 
 void
-iwx_rx_tx_cmd_single(struct iwx_softc *sc, struct iwx_rx_packet *pkt,
-    struct iwx_node *in)
-{
-       struct ieee80211com *ic = &sc->sc_ic;
-       struct ifnet *ifp = IC2IFP(ic);
-       struct iwx_tx_resp *tx_resp = (void *)pkt->data;
-       int status = le16toh(tx_resp->status.status) & IWX_TX_STATUS_MSK;
-       int txfail;
-       
-       KASSERT(tx_resp->frame_count == 1);
-
-       txfail = (status != IWX_TX_STATUS_SUCCESS &&
-           status != IWX_TX_STATUS_DIRECT_DONE);
-
-       if (txfail)
-               ifp->if_oerrors++;
-}
- 
-void
 iwx_clear_tx_desc(struct iwx_softc *sc, struct iwx_tx_ring *ring, int idx)
 {
        struct iwx_tfh_tfd *desc = &ring->desc[idx];
@@ -4422,16 +4539,57 @@ iwx_txd_done(struct iwx_softc *sc, struct iwx_tx_data 
 }
 
 void
+iwx_tx_ba_move_window(struct ieee80211com *ic, int tid, struct mbuf *m)
+{
+       struct ieee80211_node *ni = ic->ic_bss;
+       struct ieee80211_tx_ba *ba = &ni->ni_tx_ba[tid];
+       struct ieee80211_frame *wh;
+       uint16_t seq;
+
+       if (ba->ba_state != IEEE80211_BA_AGREED)
+               return;
+
+       wh = mtod(m, struct ieee80211_frame *);
+       if (!ieee80211_has_seq(wh))
+               return;
+
+       seq = letoh16(*(u_int16_t *)wh->i_seq) >> IEEE80211_SEQ_SEQ_SHIFT;
+       ba->ba_winstart = seq;
+}
+
+void
+iwx_txq_advance(struct iwx_softc *sc, struct iwx_tx_ring *ring, int idx)
+{
+       struct iwx_tx_data *txd;
+       struct ieee80211com *ic = &sc->sc_ic;
+
+       while (ring->tail != idx) {
+               txd = &ring->data[ring->tail];
+               if (txd->m != NULL) {
+                       iwx_clear_tx_desc(sc, ring, ring->tail);
+                       iwx_tx_update_byte_tbl(ring, ring->tail, 0, 0);
+                       iwx_txd_done(sc, txd);
+                       ring->queued--;
+               }
+               ring->tail = (ring->tail + 1) % IWX_TX_RING_COUNT;
+       }
+
+       if (ring->qid >= IWX_FIRST_AGG_TX_QUEUE) {
+               txd = &ring->data[idx];
+               if (txd->m != NULL)
+                       iwx_tx_ba_move_window(ic, ring->tid, txd->m);
+       }
+}
+
+void
 iwx_rx_tx_cmd(struct iwx_softc *sc, struct iwx_rx_packet *pkt,
     struct iwx_rx_data *data)
 {
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = IC2IFP(ic);
        struct iwx_cmd_header *cmd_hdr = &pkt->hdr;
-       int idx = cmd_hdr->idx;
-       int qid = cmd_hdr->qid;
+       int qid = cmd_hdr->qid, status, txfail;
        struct iwx_tx_ring *ring = &sc->txq[qid];
-       struct iwx_tx_data *txd;
        struct iwx_tx_resp *tx_resp = (void *)pkt->data;
        uint32_t ssn;
        uint32_t len = iwx_rx_packet_len(pkt);
@@ -4441,33 +4599,43 @@ iwx_rx_tx_cmd(struct iwx_softc *sc, struct iwx_rx_pack
 
        sc->sc_tx_timer = 0;
 
-       txd = &ring->data[idx];
-       if (txd->m == NULL)
+       /* Sanity checks. */
+       if (sizeof(*tx_resp) > len)
                return;
-
-       if (sizeof(*tx_resp) + sizeof(ssn) +
+       if (qid < IWX_FIRST_AGG_TX_QUEUE && tx_resp->frame_count > 1)
+               return;
+       if (qid >= IWX_FIRST_AGG_TX_QUEUE && sizeof(*tx_resp) + sizeof(ssn) +
            tx_resp->frame_count * sizeof(tx_resp->status) > len)
                return;
 
-       iwx_rx_tx_cmd_single(sc, pkt, txd->in);
+       if (tx_resp->frame_count > 1) /* A-MPDU */
+               return;
 
+       status = le16toh(tx_resp->status.status) & IWX_TX_STATUS_MSK;
+       txfail = (status != IWX_TX_STATUS_SUCCESS &&
+           status != IWX_TX_STATUS_DIRECT_DONE);
+
+       if (txfail)
+               ifp->if_oerrors++;
+
        /*
-        * Even though this is not an agg queue, we must only free
-        * frames before the firmware's starting sequence number.
+        * On hardware supported by iwx(4) the SSN counter is only
+        * 8 bit and corresponds to a Tx ring index rather than a
+        * sequence number. Frames up to this index (non-inclusive)
+        * can now be freed.
         */
        memcpy(&ssn, &tx_resp->status + tx_resp->frame_count, sizeof(ssn));
-       ssn = le32toh(ssn) & 0xfff;
-       while (ring->tail != IWX_AGG_SSN_TO_TXQ_IDX(ssn)) {
-               txd = &ring->data[ring->tail];
-               if (txd->m != NULL) {
-                       iwx_txd_done(sc, txd);
-                       iwx_clear_tx_desc(sc, ring, ring->tail);
-                       iwx_tx_update_byte_tbl(ring, ring->tail, 0, 0);
-                       ring->queued--;
-               }
-               ring->tail = (ring->tail + 1) % IWX_TX_RING_COUNT;
-       }
+       ssn = le32toh(ssn) & 0xff;
+       iwx_txq_advance(sc, ring, ssn);
+       iwx_clear_oactive(sc, ring);
+}
 
+void
+iwx_clear_oactive(struct iwx_softc *sc, struct iwx_tx_ring *ring)
+{
+       struct ieee80211com *ic = &sc->sc_ic;
+       struct ifnet *ifp = IC2IFP(ic);
+
        if (ring->queued < IWX_TX_RING_LOMARK) {
                sc->qfullmsk &= ~(1 << ring->qid);
                if (sc->qfullmsk == 0 && ifq_is_oactive(&ifp->if_snd)) {
@@ -4483,6 +4651,64 @@ iwx_rx_tx_cmd(struct iwx_softc *sc, struct iwx_rx_pack
 }
 
 void
+iwx_rx_compressed_ba(struct iwx_softc *sc, struct iwx_rx_packet *pkt,
+    struct iwx_rx_data *data)
+{
+       struct iwx_compressed_ba_notif *ba_res = (void *)pkt->data;
+       struct ieee80211com *ic = &sc->sc_ic;
+       struct ieee80211_node *ni;
+       struct ieee80211_tx_ba *ba;
+       struct iwx_node *in;
+       struct iwx_tx_ring *ring;
+       uint16_t i, tfd_cnt, ra_tid_cnt, idx;
+       int qid;
+
+       if (ic->ic_state != IEEE80211_S_RUN)
+               return;
+
+       if (iwx_rx_packet_payload_len(pkt) < sizeof(*ba_res))
+               return;
+
+       if (ba_res->sta_id != IWX_STATION_ID)
+               return;
+
+       ni = ic->ic_bss;
+       in = (void *)ni;
+
+       tfd_cnt = le16toh(ba_res->tfd_cnt);
+       ra_tid_cnt = le16toh(ba_res->ra_tid_cnt);
+       if (!tfd_cnt || iwx_rx_packet_payload_len(pkt) < (sizeof(*ba_res) +
+           sizeof(ba_res->ra_tid[0]) * ra_tid_cnt +
+           sizeof(ba_res->tfd[0]) * tfd_cnt))
+               return;
+
+       for (i = 0; i < tfd_cnt; i++) {
+               struct iwx_compressed_ba_tfd *ba_tfd = &ba_res->tfd[i];
+               uint8_t tid;
+
+               tid = ba_tfd->tid;
+               if (tid >= nitems(sc->aggqid))
+                       continue;
+
+               qid = sc->aggqid[tid];
+               if (qid != htole16(ba_tfd->q_num))
+                       continue;
+
+               ring = &sc->txq[qid];
+
+               ba = &ni->ni_tx_ba[tid];
+               if (ba->ba_state != IEEE80211_BA_AGREED)
+                       continue;
+
+               idx = le16toh(ba_tfd->tfd_index);
+               if (idx >= IWX_TX_RING_COUNT)
+                       continue;
+               iwx_txq_advance(sc, ring, idx);
+               iwx_clear_oactive(sc, ring);
+       }
+}
+
+void
 iwx_rx_bmiss(struct iwx_softc *sc, struct iwx_rx_packet *pkt,
     struct iwx_rx_data *data)
 {
@@ -5078,7 +5304,7 @@ iwx_tx_update_byte_tbl(struct iwx_tx_ring *txq, int id
 }
 
 int
-iwx_tx(struct iwx_softc *sc, struct mbuf *m, struct ieee80211_node *ni, int ac)
+iwx_tx(struct iwx_softc *sc, struct mbuf *m, struct ieee80211_node *ni)
 {
        struct ieee80211com *ic = &sc->sc_ic;
        struct iwx_node *in = (void *)ni;
@@ -5094,25 +5320,36 @@ iwx_tx(struct iwx_softc *sc, struct mbuf *m, struct ie
        u_int hdrlen;
        bus_dma_segment_t *seg;
        uint16_t num_tbs;
-       uint8_t type;
-       int i, totlen, err, pad;
+       uint8_t type, subtype;
+       int i, totlen, err, pad, qid;
 
        wh = mtod(m, struct ieee80211_frame *);
-       hdrlen = ieee80211_get_hdrlen(wh);
        type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
+       subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
+       if (type == IEEE80211_FC0_TYPE_CTL)
+               hdrlen = sizeof(struct ieee80211_frame_min);
+       else
+               hdrlen = ieee80211_get_hdrlen(wh);
 
-       /*
-        * Map EDCA categories to Tx data queues.
-        *
-        * We use static data queue assignments even in DQA mode. We do not
-        * need to share Tx queues between stations because we only implement
-        * client mode; the firmware's station table contains only one entry
-        * which represents our access point.
-        *
-        * Tx aggregation will require additional queues (one queue per TID
-        * for which aggregation is enabled) but we do not implement this yet.
-        */
-       ring = &sc->txq[ac + sc->first_data_qid];
+       qid = sc->first_data_qid;
+
+       /* Put QoS frames on the data queue which maps to their TID. */
+       if (ieee80211_has_qos(wh)) {
+               struct ieee80211_tx_ba *ba;
+               uint16_t qos = ieee80211_get_qos(wh);
+               uint8_t tid = qos & IEEE80211_QOS_TID;
+
+               ba = &ni->ni_tx_ba[tid];
+               if (!IEEE80211_IS_MULTICAST(wh->i_addr1) &&
+                   type == IEEE80211_FC0_TYPE_DATA &&
+                   subtype != IEEE80211_FC0_SUBTYPE_NODATA &&
+                   sc->aggqid[tid] != 0 &&
+                   ba->ba_state == IEEE80211_BA_AGREED) {
+                       qid = sc->aggqid[tid];
+               }
+       }
+
+       ring = &sc->txq[qid];
        desc = &ring->desc[ring->cur];
        memset(desc, 0, sizeof(*desc));
        data = &ring->data[ring->cur];
@@ -5263,18 +5500,167 @@ iwx_tx(struct iwx_softc *sc, struct mbuf *m, struct ie
 }
 
 int
-iwx_flush_tx_path(struct iwx_softc *sc)
+iwx_flush_sta_tids(struct iwx_softc *sc, int sta_id, uint16_t tids)
 {
+       struct iwx_rx_packet *pkt;
+       struct iwx_tx_path_flush_cmd_rsp *resp;
        struct iwx_tx_path_flush_cmd flush_cmd = {
-               .sta_id = htole32(IWX_STATION_ID),
-               .tid_mask = htole16(0xffff),
+               .sta_id = htole32(sta_id),
+               .tid_mask = htole16(tids),
        };
+       struct iwx_host_cmd hcmd = {
+               .id = IWX_TXPATH_FLUSH,
+               .len = { sizeof(flush_cmd), },
+               .data = { &flush_cmd, },
+               .flags = IWX_CMD_WANT_RESP,
+               .resp_pkt_len = sizeof(*pkt) + sizeof(*resp),
+       };
+       int err, resp_len, i, num_flushed_queues;
+
+       err = iwx_send_cmd(sc, &hcmd);
+       if (err)
+               return err;
+
+       pkt = hcmd.resp_pkt;
+       if (!pkt || (pkt->hdr.flags & IWX_CMD_FAILED_MSK)) {
+               err = EIO;
+               goto out;
+       }
+
+       resp_len = iwx_rx_packet_payload_len(pkt);
+       /* Some firmware versions don't provide a response. */
+       if (resp_len == 0)
+               goto out;
+       else if (resp_len != sizeof(*resp)) {
+               err = EIO;
+               goto out;
+       }
+
+       resp = (void *)pkt->data;
+
+       if (le16toh(resp->sta_id) != sta_id) {
+               err = EIO;
+               goto out;
+       }
+
+       num_flushed_queues = le16toh(resp->num_flushed_queues);
+       if (num_flushed_queues > IWX_TX_FLUSH_QUEUE_RSP) {
+               err = EIO;
+               goto out;
+       }
+
+       for (i = 0; i < num_flushed_queues; i++) {
+               struct iwx_flush_queue_info *queue_info = &resp->queues[i];
+               uint16_t tid = le16toh(queue_info->tid);
+               uint16_t read_after = le16toh(queue_info->read_after_flush);
+               uint16_t qid = le16toh(queue_info->queue_num);
+               struct iwx_tx_ring *txq;
+
+               if (qid >= nitems(sc->txq))
+                       continue;
+
+               txq = &sc->txq[qid];
+               if (tid != txq->tid)
+                       continue;
+
+               iwx_txq_advance(sc, txq, read_after);
+       }
+out:
+       iwx_free_resp(sc, &hcmd);
+       return err;
+}
+
+#define IWX_FLUSH_WAIT_MS      2000
+
+int
+iwx_wait_tx_queues_empty(struct iwx_softc *sc)
+{
+       int i, err;
+
+       for (i = 0; i < nitems(sc->txq); i++) {
+               struct iwx_tx_ring *ring = &sc->txq[i];
+
+               if (i == IWX_DQA_CMD_QUEUE)
+                       continue;
+
+               while (ring->queued > 0) {
+                       err = tsleep_nsec(ring, 0, "iwxflush",
+                           MSEC_TO_NSEC(IWX_FLUSH_WAIT_MS));
+                       if (err)
+                               return err;
+               }
+       }
+
+       return 0;
+}
+
+int
+iwx_drain_sta(struct iwx_softc *sc, struct iwx_node* in, int drain)
+{
+       struct iwx_add_sta_cmd cmd;
        int err;
+       uint32_t status;
 
-       err = iwx_send_cmd_pdu(sc, IWX_TXPATH_FLUSH, 0,
-           sizeof(flush_cmd), &flush_cmd);
+       memset(&cmd, 0, sizeof(cmd));
+       cmd.mac_id_n_color = htole32(IWX_FW_CMD_ID_AND_COLOR(in->in_id,
+           in->in_color));
+       cmd.sta_id = IWX_STATION_ID;
+       cmd.add_modify = IWX_STA_MODE_MODIFY;
+       cmd.station_flags = drain ? htole32(IWX_STA_FLG_DRAIN_FLOW) : 0;
+       cmd.station_flags_msk = htole32(IWX_STA_FLG_DRAIN_FLOW);
+
+       status = IWX_ADD_STA_SUCCESS;
+       err = iwx_send_cmd_pdu_status(sc, IWX_ADD_STA,
+           sizeof(cmd), &cmd, &status);
+       if (err) {
+               printf("%s: could not update sta (error %d)\n",
+                   DEVNAME(sc), err);
+               return err;
+       }
+
+       switch (status & IWX_ADD_STA_STATUS_MASK) {
+       case IWX_ADD_STA_SUCCESS:
+               break;
+       default:
+               err = EIO;
+               printf("%s: Couldn't %s draining for station\n",
+                   DEVNAME(sc), drain ? "enable" : "disable");
+               break;
+       }
+
+       return err;
+}
+
+int
+iwx_flush_sta(struct iwx_softc *sc, struct iwx_node *in)
+{
+       int err;
+
+       splassert(IPL_NET);
+
+       sc->sc_flags |= IWX_FLAG_TXFLUSH;
+
+       err = iwx_drain_sta(sc, in, 1);
        if (err)
-                printf("%s: Flushing tx queue failed: %d\n", DEVNAME(sc), err);
+               goto done;
+
+       err = iwx_flush_sta_tids(sc, IWX_STATION_ID, 0xffff);
+       if (err) {
+               printf("%s: could not flush Tx path (error %d)\n",
+                   DEVNAME(sc), err);
+               goto done;
+       }
+
+       err = iwx_wait_tx_queues_empty(sc);
+       if (err) {
+               printf("%s: Could not empty Tx queues (error %d)\n",
+                   DEVNAME(sc), err);
+               goto done;
+       }
+
+       err = iwx_drain_sta(sc, in, 0);
+done:
+       sc->sc_flags &= ~IWX_FLAG_TXFLUSH;
        return err;
 }
 
@@ -5441,9 +5827,6 @@ iwx_add_sta_cmd(struct iwx_softc *sc, struct iwx_node 
        add_sta_cmd.add_modify = update ? 1 : 0;
        add_sta_cmd.station_flags_msk
            |= htole32(IWX_STA_FLG_FAT_EN_MSK | IWX_STA_FLG_MIMO_EN_MSK);
-       add_sta_cmd.tid_disable_tx = htole16(0xffff);
-       if (update)
-               add_sta_cmd.modify_mask |= (IWX_STA_MODIFY_TID_DISABLE_TX);
 
        if (in->in_ni.ni_flags & IEEE80211_NODE_HT) {
                add_sta_cmd.station_flags_msk
@@ -5515,7 +5898,6 @@ iwx_add_aux_sta(struct iwx_softc *sc)
        cmd.station_type = IWX_STA_AUX_ACTIVITY;
        cmd.mac_id_n_color =
            htole32(IWX_FW_CMD_ID_AND_COLOR(IWX_MAC_INDEX_AUX, 0));
-       cmd.tid_disable_tx = htole16(0xffff);
 
        status = IWX_ADD_STA_SUCCESS;
        err = iwx_send_cmd_pdu_status(sc, IWX_ADD_STA, sizeof(cmd), &cmd,
@@ -5549,6 +5931,46 @@ iwx_rm_sta_cmd(struct iwx_softc *sc, struct iwx_node *
        return err;
 }
 
+int
+iwx_rm_sta(struct iwx_softc *sc, struct iwx_node *in)
+{
+       struct ieee80211com *ic = &sc->sc_ic;
+       struct ieee80211_node *ni = &in->in_ni;
+       int err, i;
+
+       err = iwx_flush_sta(sc, in);
+       if (err) {
+               printf("%s: could not flush Tx path (error %d)\n",
+                   DEVNAME(sc), err);
+               return err;
+       }
+       err = iwx_rm_sta_cmd(sc, in);
+       if (err) {
+               printf("%s: could not remove STA (error %d)\n",
+                   DEVNAME(sc), err);
+               return err;
+       }
+
+       in->in_flags = 0;
+
+       sc->sc_rx_ba_sessions = 0;
+       sc->ba_rx.start_tidmask = 0;
+       sc->ba_rx.stop_tidmask = 0;
+       memset(sc->aggqid, 0, sizeof(sc->aggqid));
+       sc->ba_tx.start_tidmask = 0;
+       sc->ba_tx.stop_tidmask = 0;
+       for (i = IWX_FIRST_AGG_TX_QUEUE; i < IWX_LAST_AGG_TX_QUEUE; i++)
+               sc->qenablemsk &= ~(1 << i);
+       for (i = 0; i < IEEE80211_NUM_TID; i++) {
+               struct ieee80211_tx_ba *ba = &ni->ni_tx_ba[i];
+               if (ba->ba_state != IEEE80211_BA_AGREED)
+                       continue;
+               ieee80211_delba_request(ic, ni, 0, 1, i);
+       }
+
+       return 0;
+}
+
 uint8_t
 iwx_umac_scan_fill_channels(struct iwx_softc *sc,
     struct iwx_scan_channel_cfg_umac *chan, size_t chan_nitems,
@@ -6693,9 +7115,9 @@ iwx_scan_abort(struct iwx_softc *sc)
 }
 
 int
-iwx_enable_data_tx_queues(struct iwx_softc *sc)
+iwx_enable_mgmt_queue(struct iwx_softc *sc)
 {
-       int err, ac, cmdver;
+       int err, cmdver;
 
        /*
         * ADD_STA command version >= 12 implies that firmware uses
@@ -6709,19 +7131,16 @@ iwx_enable_data_tx_queues(struct iwx_softc *sc)
        else
                sc->first_data_qid = IWX_DQA_AUX_QUEUE + 1;
 
-       for (ac = 0; ac < EDCA_NUM_AC; ac++) {
-               int qid = ac + sc->first_data_qid;
-               /*
-                * Regular data frames use the "MGMT" TID and queue.
-                * Other TIDs and queues are reserved for frame aggregation.
-                */
-               err = iwx_enable_txq(sc, IWX_STATION_ID, qid, IWX_MGMT_TID,
-                   IWX_TX_RING_COUNT);
-               if (err) {
-                       printf("%s: could not enable Tx queue %d (error %d)\n",
-                           DEVNAME(sc), qid, err);
-                       return err;
-               }
+       /*
+        * Non-QoS frames use the "MGMT" TID and queue.
+        * Other TIDs and data queues are reserved for QoS data frames.
+        */
+       err = iwx_enable_txq(sc, IWX_STATION_ID, sc->first_data_qid,
+           IWX_MGMT_TID, IWX_TX_RING_COUNT);
+       if (err) {
+               printf("%s: could not enable Tx queue %d (error %d)\n",
+                   DEVNAME(sc), sc->first_data_qid, err);
+               return err;
        }
 
        return 0;
@@ -6799,7 +7218,7 @@ iwx_rs_init(struct iwx_softc *sc, struct iwx_node *in)
        cfg_cmd.sta_id = IWX_STATION_ID;
        cfg_cmd.max_ch_width = IWX_RATE_MCS_CHAN_WIDTH_20;
        cfg_cmd.chains = IWX_TLC_MNG_CHAIN_A_MSK | IWX_TLC_MNG_CHAIN_B_MSK;
-       cfg_cmd.max_mpdu_len = IEEE80211_MAX_LEN;
+       cfg_cmd.max_mpdu_len = 3839;
        if (ieee80211_node_supports_ht_sgi20(ni))
                cfg_cmd.sgi_ch_width_supp = (1 << IWX_TLC_MNG_CH_WIDTH_20MHZ);
 
@@ -6944,7 +7363,7 @@ iwx_auth(struct iwx_softc *sc)
                return 0;
        }
 
-       err = iwx_enable_data_tx_queues(sc);
+       err = iwx_enable_mgmt_queue(sc);
        if (err)
                goto rm_sta;
 
@@ -6997,21 +7416,10 @@ iwx_deauth(struct iwx_softc *sc)
        iwx_unprotect_session(sc, in);
 
        if (sc->sc_flags & IWX_FLAG_STA_ACTIVE) {
-               err = iwx_flush_tx_path(sc);
-               if (err) {
-                       printf("%s: could not flush Tx path (error %d)\n",
-                           DEVNAME(sc), err);
+               err = iwx_rm_sta(sc, in);
+               if (err)
                        return err;
-               }
-               err = iwx_rm_sta_cmd(sc, in);
-               if (err) {
-                       printf("%s: could not remove STA (error %d)\n",
-                           DEVNAME(sc), err);
-                       return err;
-               }
                sc->sc_flags &= ~IWX_FLAG_STA_ACTIVE;
-               sc->sc_rx_ba_sessions = 0;
-               in->in_flags = 0;
        }
 
        if (sc->sc_flags & IWX_FLAG_BINDING_ACTIVE) {
@@ -7061,7 +7469,7 @@ iwx_assoc(struct iwx_softc *sc)
        }
 
        if (!update_sta)
-               err = iwx_enable_data_tx_queues(sc);
+               err = iwx_enable_mgmt_queue(sc);
 
        return err;
 }
@@ -7076,19 +7484,10 @@ iwx_disassoc(struct iwx_softc *sc)
        splassert(IPL_NET);
 
        if (sc->sc_flags & IWX_FLAG_STA_ACTIVE) {
-               err = iwx_rm_sta_cmd(sc, in);
-               if (err) {
-                       printf("%s: could not remove STA (error %d)\n",
-                           DEVNAME(sc), err);
+               err = iwx_rm_sta(sc, in);
+               if (err)
                        return err;
-               }
                sc->sc_flags &= ~IWX_FLAG_STA_ACTIVE;
-               in->in_flags = 0;
-               sc->sc_rx_ba_sessions = 0;
-               sc->ba_start_tidmask = 0;
-               sc->ba_stop_tidmask = 0;
-               sc->ba_start_tidmask = 0;
-               sc->ba_stop_tidmask = 0;
        }
 
        return 0;
@@ -7204,6 +7603,15 @@ iwx_run_stop(struct iwx_softc *sc)
 
        splassert(IPL_NET);
 
+       if (sc->sc_flags & IWX_FLAG_STA_ACTIVE) {
+               err = iwx_flush_sta(sc, in);
+               if (err) {
+                       printf("%s: could not flush Tx path (error %d)\n",
+                           DEVNAME(sc), err);
+                       return err;
+               }
+       }
+
        err = iwx_sf_config(sc, IWX_SF_INIT_OFF);
        if (err)
                return err;
@@ -8044,7 +8452,6 @@ iwx_start(struct ifnet *ifp)
        struct ieee80211_node *ni;
        struct ether_header *eh;
        struct mbuf *m;
-       int ac = EDCA_AC_BE; /* XXX */
 
        if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd))
                return;
@@ -8056,6 +8463,10 @@ iwx_start(struct ifnet *ifp)
                        break;
                }
 
+               /* Don't queue additional frames while flushing Tx queues. */
+               if (sc->sc_flags & IWX_FLAG_TXFLUSH)
+                       break;
+
                /* need to send management frames even if we're not RUNning */
                m = mq_dequeue(&ic->ic_mgtq);
                if (m) {
@@ -8089,7 +8500,7 @@ iwx_start(struct ifnet *ifp)
                if (ic->ic_rawbpf != NULL)
                        bpf_mtap(ic->ic_rawbpf, m, BPF_DIRECTION_OUT);
 #endif
-               if (iwx_tx(sc, m, ni, ac) != 0) {
+               if (iwx_tx(sc, m, ni) != 0) {
                        ieee80211_release_node(ic, ni);
                        ifp->if_oerrors++;
                        continue;
@@ -8150,13 +8561,14 @@ iwx_stop(struct ifnet *ifp)
        sc->sc_flags &= ~IWX_FLAG_TE_ACTIVE;
        sc->sc_flags &= ~IWX_FLAG_HW_ERR;
        sc->sc_flags &= ~IWX_FLAG_SHUTDOWN;
+       sc->sc_flags &= ~IWX_FLAG_TXFLUSH;
 
        sc->sc_rx_ba_sessions = 0;
-       sc->ba_start_tidmask = 0;
-       sc->ba_stop_tidmask = 0;
-       memset(sc->ba_ssn, 0, sizeof(sc->ba_ssn));
-       memset(sc->ba_winsize, 0, sizeof(sc->ba_winsize));
-       memset(sc->ba_timeout_val, 0, sizeof(sc->ba_timeout_val));
+       sc->ba_rx.start_tidmask = 0;
+       sc->ba_rx.stop_tidmask = 0;
+       memset(sc->aggqid, 0, sizeof(sc->aggqid));
+       sc->ba_tx.start_tidmask = 0;
+       sc->ba_tx.stop_tidmask = 0;
 
        sc->sc_newstate(ic, IEEE80211_S_INIT, -1);
 
@@ -8504,7 +8916,7 @@ iwx_dump_driver_status(struct iwx_softc *sc)
        int i;
 
        printf("driver status:\n");
-       for (i = 0; i < IWX_MAX_QUEUES; i++) {
+       for (i = 0; i < nitems(sc->txq); i++) {
                struct iwx_tx_ring *ring = &sc->txq[i];
                printf("  tx ring %2d: qid=%-2d cur=%-3d "
                    "queued=%-3d\n",
@@ -8638,6 +9050,10 @@ iwx_rx_pkt(struct iwx_softc *sc, struct iwx_rx_data *d
                        iwx_rx_tx_cmd(sc, pkt, data);
                        break;
 
+               case IWX_BA_NOTIF:
+                       iwx_rx_compressed_ba(sc, pkt, data);
+                       break;
+
                case IWX_MISSED_BEACONS_NOTIFICATION:
                        iwx_rx_bmiss(sc, pkt, data);
                        break;
@@ -9444,6 +9860,8 @@ iwx_attach(struct device *parent, struct device *self,
 
        /* Set device capabilities. */
        ic->ic_caps =
+           IEEE80211_C_QOS | IEEE80211_C_TX_AMPDU | /* A-MPDU */
+           IEEE80211_C_ADDBA_OFFLOAD | /* device sends ADDBA/DELBA frames */
            IEEE80211_C_WEP |           /* WEP */
            IEEE80211_C_RSN |           /* WPA/RSN */
            IEEE80211_C_SCANALL |       /* device scans all channels at once */
@@ -9517,10 +9935,8 @@ iwx_attach(struct device *parent, struct device *self,
        ic->ic_updateedca = iwx_updateedca;
        ic->ic_ampdu_rx_start = iwx_ampdu_rx_start;
        ic->ic_ampdu_rx_stop = iwx_ampdu_rx_stop;
-#ifdef notyet
        ic->ic_ampdu_tx_start = iwx_ampdu_tx_start;
-       ic->ic_ampdu_tx_stop = iwx_ampdu_tx_stop;
-#endif
+       ic->ic_ampdu_tx_stop = NULL;
        /*
         * We cannot read the MAC address without loading the
         * firmware from disk. Postpone until mountroot is done.
blob - 42dfb8332b03a6085572840789bb1cae76959121
blob + 34899e0e47f424d4127d7b76ac92bffd7de9ae69
--- sys/dev/pci/if_iwxreg.h
+++ sys/dev/pci/if_iwxreg.h
@@ -1393,9 +1393,6 @@ struct iwx_gen3_bc_tbl {
        uint16_t tfd_offset[IWX_TFD_QUEUE_BC_SIZE_GEN3];
 } __packed;
 
-/* Maximum number of Tx queues. */
-#define IWX_MAX_QUEUES 31
-
 /**
  * DQA - Dynamic Queue Allocation -introduction
  *
@@ -1410,27 +1407,27 @@ struct iwx_gen3_bc_tbl {
  * some queues that are statically allocated:
  *     TXQ #0 - command queue
  *     TXQ #1 - aux frames
- *     TXQ #2 - P2P device frames
- *     TXQ #3 - P2P GO/SoftAP GCAST/BCAST frames
- *     TXQ #4 - BSS DATA frames queue
- *     TXQ #5-8 - non-QoS data, QoS no-data, and MGMT frames queue pool
- *     TXQ #9 - P2P GO/SoftAP probe responses
- *     TXQ #10-31 - QoS DATA frames queue pool (for Tx aggregation)
  */
 
 /* static DQA Tx queue numbers */
 #define IWX_DQA_CMD_QUEUE              0
 #define IWX_DQA_AUX_QUEUE              1
-#define IWX_DQA_P2P_DEVICE_QUEUE       2
-#define IWX_DQA_INJECT_MONITOR_QUEUE   2
-#define IWX_DQA_GCAST_QUEUE            3
-#define IWX_DQA_BSS_CLIENT_QUEUE       4
-#define IWX_DQA_MIN_MGMT_QUEUE         5
-#define IWX_DQA_MAX_MGMT_QUEUE         8
-#define IWX_DQA_AP_PROBE_RESP_QUEUE    9
-#define IWX_DQA_MIN_DATA_QUEUE         10
-#define IWX_DQA_MAX_DATA_QUEUE         31
 
+#define IWX_DQA_INJECT_MONITOR_QUEUE   2 /* used in monitor mode only */
+#define IWX_DQA_MGMT_QUEUE             1 /* default queue other modes */
+
+/* Reserve 8 DQA Tx queues for QoS data frames. */
+#define IWX_MAX_TID_COUNT      8
+#define IWX_FIRST_AGG_TX_QUEUE (IWX_DQA_MGMT_QUEUE + 1)
+#define IWX_LAST_AGG_TX_QUEUE  (IWX_FIRST_AGG_TX_QUEUE + IWX_MAX_TID_COUNT - 1)
+
+/**
+ * Max Tx window size is the max number of contiguous TFDs that the scheduler
+ * can keep track of at one time when creating block-ack chains of frames.
+ * Note that "64" matches the number of ack bits in a block-ack packet.
+ */
+#define IWX_FRAME_LIMIT        64
+
 #define IWX_TX_FIFO_BK 0
 #define IWX_TX_FIFO_BE 1
 #define IWX_TX_FIFO_VI 2
@@ -4952,7 +4949,6 @@ struct iwx_tlc_update_notif {
 /*
  * TID for non QoS frames - to be written in tid_tspec
  */
-#define IWX_MAX_TID_COUNT      8
 #define IWX_TID_NON_QOS        0
 
 /*
@@ -5290,35 +5286,96 @@ struct iwx_tx_resp {
 } __packed; /* TX_RSP_API_S_VER_6 */
 
 /**
- * struct iwx_ba_notif - notifies about reception of BA
- * ( IWX_BA_NOTIF = 0xc5 )
- * @sta_addr_lo32: lower 32 bits of the MAC address
- * @sta_addr_hi16: upper 16 bits of the MAC address
+ * struct iwx_compressed_ba_tfd - progress of a TFD queue
+ * @q_num: TFD queue number
+ * @tfd_index: Index of first un-acked frame in the  TFD queue
+ * @scd_queue: For debug only - the physical queue the TFD queue is bound to
+ * @tid: TID of the queue (0-7)
+ * @reserved: reserved for alignment
+ */
+struct iwx_compressed_ba_tfd {
+       uint16_t q_num;
+       uint16_t tfd_index;
+       uint8_t scd_queue;
+       uint8_t tid;
+       uint8_t reserved[2];
+} __packed; /* COMPRESSED_BA_TFD_API_S_VER_1 */
+
+/**
+ * struct iwx_compressed_ba_ratid - progress of a RA TID queue
+ * @q_num: RA TID queue number
+ * @tid: TID of the queue
+ * @ssn: BA window current SSN
+ */
+struct iwx_compressed_ba_ratid {
+       uint8_t q_num;
+       uint8_t tid;
+       uint16_t ssn;
+} __packed; /* COMPRESSED_BA_RATID_API_S_VER_1 */
+
+/*
+ * enum iwx_ba_resp_flags - TX aggregation status
+ * @IWX_MVM_BA_RESP_TX_AGG: generated due to BA
+ * @IWX_MVM_BA_RESP_TX_BAR: generated due to BA after BAR
+ * @IWX_MVM_BA_RESP_TX_AGG_FAIL: aggregation didn't receive BA
+ * @IWX_MVM_BA_RESP_TX_UNDERRUN: aggregation got underrun
+ * @IWX_MVM_BA_RESP_TX_BT_KILL: aggregation got BT-kill
+ * @IWX_MVM_BA_RESP_TX_DSP_TIMEOUT: aggregation didn't finish within the
+ *     expected time
+ */
+enum iwx_ba_resp_flags {
+       IWX_MVM_BA_RESP_TX_AGG,
+       IWX_MVM_BA_RESP_TX_BAR,
+       IWX_MVM_BA_RESP_TX_AGG_FAIL,
+       IWX_MVM_BA_RESP_TX_UNDERRUN,
+       IWX_MVM_BA_RESP_TX_BT_KILL,
+       IWX_MVM_BA_RESP_TX_DSP_TIMEOUT
+};
+
+/**
+ * struct iwx_compressed_ba_notif - notifies about reception of BA
+ * ( BA_NOTIF = 0xc5 )
+ * @flags: status flag, see the &iwx_ba_resp_flags
  * @sta_id: Index of recipient (BA-sending) station in fw's station table
- * @tid: tid of the session
- * @seq_ctl:
- * @bitmap: the bitmap of the BA notification as seen in the air
- * @scd_flow: the tx queue this BA relates to
- * @scd_ssn: the index of the last contiguously sent packet
- * @txed: number of Txed frames in this batch
- * @txed_2_done: number of Acked frames in this batch
+ * @reduced_txp: power reduced according to TPC. This is the actual value and
+ *     not a copy from the LQ command. Thus, if not the first rate was used
+ *     for Tx-ing then this value will be set to 0 by FW.
+ * @tlc_rate_info: TLC rate info, initial rate index, TLC table color
+ * @retry_cnt: retry count
+ * @query_byte_cnt: SCD query byte count
+ * @query_frame_cnt: SCD query frame count
+ * @txed: number of frames sent in the aggregation (all-TIDs)
+ * @done: number of frames that were Acked by the BA (all-TIDs)
+ * @reserved: reserved (for alignment)
+ * @wireless_time: Wireless-media time
+ * @tx_rate: the rate the aggregation was sent at
+ * @tfd_cnt: number of TFD-Q elements
+ * @ra_tid_cnt: number of RATID-Q elements
+ * @tfd: array of TFD queue status updates. See &iwx_compressed_ba_tfd
+ *     for details. Length in @tfd_cnt.
+ * @ra_tid: array of RA-TID queue status updates. For debug purposes only. See
+ *     &iwx_compressed_ba_ratid for more details. Length in @ra_tid_cnt.
  */
-struct iwx_ba_notif {
-       uint32_t sta_addr_lo32;
-       uint16_t sta_addr_hi16;
-       uint16_t reserved;
-
+struct iwx_compressed_ba_notif {
+       uint32_t flags;
        uint8_t sta_id;
-       uint8_t tid;
-       uint16_t seq_ctl;
-       uint64_t bitmap;
-       uint16_t scd_flow;
-       uint16_t scd_ssn;
-       uint8_t txed;
-       uint8_t txed_2_done;
-       uint16_t reserved1;
-} __packed;
+       uint8_t reduced_txp;
+       uint8_t tlc_rate_info;
+       uint8_t retry_cnt;
+       uint32_t query_byte_cnt;
+       uint16_t query_frame_cnt;
+       uint16_t txed;
+       uint16_t done;
+       uint16_t reserved;
+       uint32_t wireless_time;
+       uint32_t tx_rate;
+       uint16_t tfd_cnt;
+       uint16_t ra_tid_cnt;
+       struct iwx_compressed_ba_ratid ra_tid[0];
+       struct iwx_compressed_ba_tfd tfd[];
+} __packed; /* COMPRESSED_BA_RES_API_S_VER_4 */
 
+
 struct iwx_beacon_notif {
        struct iwx_tx_resp_v3 beacon_notify_hdr;
        uint64_t tsf;
@@ -5356,7 +5413,34 @@ struct iwx_tx_path_flush_cmd {
        uint16_t reserved;
 } __packed; /* TX_PATH_FLUSH_CMD_API_S_VER_2 */
 
+#define IWX_TX_FLUSH_QUEUE_RSP 16
+
 /**
+ * struct iwx_flush_queue_info - virtual flush queue info
+ * @queue_num: virtual queue id
+ * @read_before_flush: read pointer before flush
+ * @read_after_flush: read pointer after flush
+ */
+struct iwx_flush_queue_info {
+       uint16_t tid;
+       uint16_t queue_num;
+       uint16_t read_before_flush;
+       uint16_t read_after_flush;
+} __packed; /* TFDQ_FLUSH_INFO_API_S_VER_1 */
+
+/**
+ * struct iwx_tx_path_flush_cmd_rsp -- queue/FIFO flush command response
+ * @num_flushed_queues: number of queues in queues array
+ * @queues: all flushed queues
+ */
+struct iwx_tx_path_flush_cmd_rsp {
+       uint16_t sta_id;
+       uint16_t num_flushed_queues;
+       struct iwx_flush_queue_info queues[IWX_TX_FLUSH_QUEUE_RSP];
+} __packed; /* TX_PATH_FLUSH_CMD_RSP_API_S_VER_1 */
+
+
+/**
  * iwx_get_scd_ssn - returns the SSN of the SCD
  * @tx_resp: the Tx response from the fw (agg or non-agg)
  *
blob - 3eb5974fc21e11962cdbbb08cd66c1451e0acd30
blob + be876dc0b0768a84d9da2ee144efe2b6f3701d06
--- sys/dev/pci/if_iwxvar.h
+++ sys/dev/pci/if_iwxvar.h
@@ -224,9 +224,6 @@ struct iwx_dma_info {
 #define IWX_TX_RING_LOMARK     192
 #define IWX_TX_RING_HIMARK     224
 
-/* For aggregation queues, index must be aligned to frame sequence number. */
-#define IWX_AGG_SSN_TO_TXQ_IDX(x)      ((x) & (IWX_TX_RING_COUNT - 1))
-
 struct iwx_tx_data {
        bus_dmamap_t    map;
        bus_addr_t      cmd_paddr;
@@ -247,6 +244,7 @@ struct iwx_tx_ring {
        int                     queued;
        int                     cur;
        int                     tail;
+       int                     tid;
 };
 
 #define IWX_RX_MQ_RING_COUNT   512
@@ -279,6 +277,7 @@ struct iwx_rx_ring {
 #define IWX_FLAG_HW_ERR                0x80    /* hardware error occurred */
 #define IWX_FLAG_SHUTDOWN      0x100   /* shutting down; new tasks forbidden */
 #define IWX_FLAG_BGSCAN                0x200   /* background scan in progress 
*/
+#define IWX_FLAG_TXFLUSH       0x400   /* Tx queue flushing in progress */
 
 struct iwx_ucode_status {
        uint32_t uc_lmac_error_event_table[2];
@@ -445,6 +444,11 @@ struct iwx_setkey_task_arg {
        struct ieee80211_key *k;
 };
 
+struct iwx_ba_task_data {
+       uint32_t                start_tidmask;
+       uint32_t                stop_tidmask;
+};
+
 struct iwx_softc {
        struct device sc_dev;
        struct ieee80211com sc_ic;
@@ -459,11 +463,8 @@ struct iwx_softc {
 
        /* Task for firmware BlockAck setup/teardown and its arguments. */
        struct task             ba_task;
-       uint32_t                ba_start_tidmask;
-       uint32_t                ba_stop_tidmask;
-       uint16_t                ba_ssn[IWX_MAX_TID_COUNT];
-       uint16_t                ba_winsize[IWX_MAX_TID_COUNT];
-       int                     ba_timeout_val[IWX_MAX_TID_COUNT];
+       struct iwx_ba_task_data ba_rx;
+       struct iwx_ba_task_data ba_tx;
 
        /* Task for setting encryption keys and its arguments. */
        struct task             setkey_task;
@@ -492,10 +493,12 @@ struct iwx_softc {
        int sc_msix;
 
        /* TX/RX rings. */
-       struct iwx_tx_ring txq[IWX_MAX_QUEUES];
+       struct iwx_tx_ring txq[IWX_LAST_AGG_TX_QUEUE];
        struct iwx_rx_ring rxq;
        int qfullmsk;
+       int qenablemsk;
        int first_data_qid;
+       int aggqid[IEEE80211_NUM_TID];
 
        int sc_sf_state;
 
blob - bf7abf058b86c4b0ad402a22c2e3f3c35e81788e
blob + b3b30f1f98f689c6d066cff04b07b938fde0cc68
--- sys/net80211/ieee80211_proto.c
+++ sys/net80211/ieee80211_proto.c
@@ -700,6 +700,18 @@ ieee80211_addba_request(struct ieee80211com *ic, struc
                /* immediate BA */
                ba->ba_params |= IEEE80211_ADDBA_BA_POLICY;
 
+       if ((ic->ic_caps & IEEE80211_C_ADDBA_OFFLOAD) &&
+           ic->ic_ampdu_tx_start != NULL) {
+               int err = ic->ic_ampdu_tx_start(ic, ni, tid);
+               if (err && err != EBUSY) {
+                       /* driver failed to setup, rollback */
+                       ieee80211_addba_resp_refuse(ic, ni, tid,
+                           IEEE80211_STATUS_UNSPECIFIED);
+               } else if (err == 0)
+                       ieee80211_addba_resp_accept(ic, ni, tid);
+               return err; /* The device will send an ADDBA frame. */
+       }
+
        timeout_add_sec(&ba->ba_to, 1); /* dot11ADDBAResponseTimeout */
        IEEE80211_SEND_ACTION(ic, ni, IEEE80211_CATEG_BA,
            IEEE80211_ACTION_ADDBA_REQ, tid);
@@ -948,6 +960,13 @@ ieee80211_stop_ampdu_tx(struct ieee80211com *ic, struc
                struct ieee80211_tx_ba *ba = &ni->ni_tx_ba[tid];
                if (ba->ba_state != IEEE80211_BA_AGREED)
                        continue;
+
+               if (ic->ic_caps & IEEE80211_C_ADDBA_OFFLOAD) {
+                       if (ic->ic_ampdu_tx_stop != NULL)
+                               ic->ic_ampdu_tx_stop(ic, ni, tid);
+                       continue; /* Don't change ba->ba_state! */
+               }
+
                ieee80211_delba_request(ic, ni,
                    mgt == -1 ? 0 : IEEE80211_REASON_AUTH_LEAVE, 1, tid);
        }
blob - 334439887ffe54232e2ccc488b975f1cda5a26a3
blob + 0ca42aaf5b0af1be78d69cc692c02bc9a35b123a
--- sys/net80211/ieee80211_var.h
+++ sys/net80211/ieee80211_var.h
@@ -432,6 +432,7 @@ struct ieee80211_ess {
 #define IEEE80211_C_RAWCTL     0x00004000      /* CAPABILITY: raw ctl */
 #define IEEE80211_C_SCANALLBAND        0x00008000      /* CAPABILITY: scan all 
bands */
 #define IEEE80211_C_TX_AMPDU   0x00010000      /* CAPABILITY: send A-MPDU */
+#define IEEE80211_C_ADDBA_OFFLOAD 0x00020000   /* CAPABILITY: ADDBA offload */
 
 /* flags for ieee80211_fix_rate() */
 #define        IEEE80211_F_DOSORT      0x00000001      /* sort rate list */

Reply via email to