running the updated patch, will notify if anything breaks:
iwx0 at pci0 dev 20 function 3 "Intel Wi-Fi 6 AX201" rev 0x20, msix

Previous patch :
Testing download
speed................................................................................
Download: 74.76 Mbit/s
Testing upload 
speed......................................................................................................
Upload: 45.99 Mbit/s

Now:
Testing download
speed................................................................................
Download: 85.89 Mbit/s
Testing upload 
speed......................................................................................................
Upload: 74.45 Mbit/s

HTH,
Eric.


On Sat, Sep 11, 2021 at 12:15 PM Stefan Sperling <s...@stsp.name> wrote:
>
> On Fri, Sep 10, 2021 at 06:49:49PM +0200, Stefan Sperling wrote:
> > Here is another attempt at adding Tx aggregation to iwx(4).
> > This patch is based on the latest state in CVS (if_iwx.c r1.107, which
> > I have committed a minute ago). Sync your tree before applying this patch.
> >
> > Compared to previous iterations of this patch, I have fixed bugs which
> > caused fatal firmware errors and which made traffic stall after roaming.
> >
> > This patch could still make 7.0 release if it gets sufficient test coverage.
> > Please run with this and report any regressions. Thanks!
> >
> > So far, tested by me on AX200 and AX201 against a Pepwave 11ac AP.
> > I have so far not seen any fatal firmware errors, and roaming between 2GHz
> > and 5GHz channels offered by the same AP seems to work reliably.
> > Throughput goes up to 100 Mbit/s max.
>
> The previous version had a problem where it did not take frames
> off the Tx ring when they were done. It is possible that this
> could lead to memory corruption (seen by mlarkin).
>
> Please run this updated patch instead.
>
> And please enable 'ifconfig iwx0 debug' while testing this patch.
> Problem reports will be a lot more useful with debug enabled :)
>
> diff refs/heads/iwx-resume2 refs/heads/iwx-txagg
> blob - 4cfc91b7f4819a1a9f50fdaac339a78f67d1ab5a
> blob + 9e31a8d0bb5c9ba1fad3614fe6dcb5ebdcd33403
> --- sys/dev/pci/if_iwx.c
> +++ sys/dev/pci/if_iwx.c
> @@ -318,18 +318,16 @@ int       iwx_ampdu_rx_start(struct ieee80211com *, 
> struct i
>             uint8_t);
>  void   iwx_ampdu_rx_stop(struct ieee80211com *, struct ieee80211_node *,
>             uint8_t);
> +int    iwx_ampdu_tx_start(struct ieee80211com *, struct ieee80211_node *,
> +           uint8_t);
>  void   iwx_rx_ba_session_expired(void *);
>  void   iwx_rx_bar_frame_release(struct iwx_softc *, struct iwx_rx_packet *,
>             struct iwx_rx_data *, struct mbuf_list *);
>  void   iwx_reorder_timer_expired(void *);
>  void   iwx_sta_rx_agg(struct iwx_softc *, struct ieee80211_node *, uint8_t,
>             uint16_t, uint16_t, int, int);
> -#ifdef notyet
> -int    iwx_ampdu_tx_start(struct ieee80211com *, struct ieee80211_node *,
> +void   iwx_sta_tx_agg_start(struct iwx_softc *, struct ieee80211_node *,
>             uint8_t);
> -void   iwx_ampdu_tx_stop(struct ieee80211com *, struct ieee80211_node *,
> -           uint8_t);
> -#endif
>  void   iwx_ba_task(void *);
>
>  int    iwx_set_mac_addr_from_csr(struct iwx_softc *, struct iwx_nvm_data *);
> @@ -355,10 +353,13 @@ int       iwx_ccmp_decap(struct iwx_softc *, struct 
> mbuf *,
>             struct ieee80211_node *, struct ieee80211_rxinfo *);
>  void   iwx_rx_frame(struct iwx_softc *, struct mbuf *, int, uint32_t, int, 
> int,
>             uint32_t, struct ieee80211_rxinfo *, struct mbuf_list *);
> -void   iwx_rx_tx_cmd_single(struct iwx_softc *, struct iwx_rx_packet *,
> -           struct iwx_node *);
> +void   iwx_clear_tx_desc(struct iwx_softc *, struct iwx_tx_ring *, int);
> +void   iwx_txd_done(struct iwx_softc *, struct iwx_tx_data *);
> +void   iwx_tx_ba_move_window(struct ieee80211com *, int, struct mbuf *);
> +void   iwx_txq_advance(struct iwx_softc *, struct iwx_tx_ring *, int);
>  void   iwx_rx_tx_cmd(struct iwx_softc *, struct iwx_rx_packet *,
>             struct iwx_rx_data *);
> +void   iwx_clear_oactive(struct iwx_softc *, struct iwx_tx_ring *);
>  void   iwx_rx_bmiss(struct iwx_softc *, struct iwx_rx_packet *,
>             struct iwx_rx_data *);
>  int    iwx_binding_cmd(struct iwx_softc *, struct iwx_node *, uint32_t);
> @@ -382,8 +383,11 @@ void       iwx_cmd_done(struct iwx_softc *, int, int, 
> int);
>  const struct iwx_rate *iwx_tx_fill_cmd(struct iwx_softc *, struct iwx_node *,
>             struct ieee80211_frame *, struct iwx_tx_cmd_gen2 *);
>  void   iwx_tx_update_byte_tbl(struct iwx_tx_ring *, int, uint16_t, uint16_t);
> -int    iwx_tx(struct iwx_softc *, struct mbuf *, struct ieee80211_node *, 
> int);
> -int    iwx_flush_tx_path(struct iwx_softc *);
> +int    iwx_tx(struct iwx_softc *, struct mbuf *, struct ieee80211_node *);
> +int    iwx_flush_sta_tids(struct iwx_softc *, int, uint16_t);
> +int    iwx_wait_tx_queues_empty(struct iwx_softc *);
> +int    iwx_drain_sta(struct iwx_softc *sc, struct iwx_node *, int);
> +int    iwx_flush_sta(struct iwx_softc *, struct iwx_node *);
>  int    iwx_beacon_filter_send_cmd(struct iwx_softc *,
>             struct iwx_beacon_filter_cmd *);
>  int    iwx_update_beacon_abort(struct iwx_softc *, struct iwx_node *, int);
> @@ -396,6 +400,7 @@ int iwx_disable_beacon_filter(struct iwx_softc *);
>  int    iwx_add_sta_cmd(struct iwx_softc *, struct iwx_node *, int);
>  int    iwx_add_aux_sta(struct iwx_softc *);
>  int    iwx_rm_sta_cmd(struct iwx_softc *, struct iwx_node *);
> +int    iwx_rm_sta(struct iwx_softc *, struct iwx_node *);
>  int    iwx_fill_probe_req(struct iwx_softc *, struct iwx_scan_probe_req *);
>  int    iwx_config_umac_scan_reduced(struct iwx_softc *);
>  int    iwx_config_umac_scan(struct iwx_softc *);
> @@ -425,6 +430,7 @@ int iwx_scan(struct iwx_softc *);
>  int    iwx_bgscan(struct ieee80211com *);
>  int    iwx_umac_scan_abort(struct iwx_softc *);
>  int    iwx_scan_abort(struct iwx_softc *);
> +int    iwx_enable_mgmt_queue(struct iwx_softc *);
>  int    iwx_rs_rval2idx(uint8_t);
>  uint16_t iwx_rs_ht_rates(struct iwx_softc *, struct ieee80211_node *, int);
>  int    iwx_rs_init(struct iwx_softc *, struct iwx_node *);
> @@ -1776,20 +1782,20 @@ iwx_alloc_tx_ring(struct iwx_softc *sc, struct iwx_tx_
>         ring->desc = ring->desc_dma.vaddr;
>
>         /*
> -        * There is no need to allocate DMA buffers for unused rings.
> -        * The hardware supports up to 31 Tx rings which is more
> +        * The hardware supports up to 512 Tx rings which is more
>          * than we currently need.
>          *
> -        * In DQA mode we use 1 command queue + 4 DQA mgmt/data queues.
> -        * The command is queue 0 (sc->txq[0]), and 4 mgmt/data frame queues
> -        * are sc->tqx[ac + IWX_DQA_AUX_QUEUE + 1], i.e. sc->txq[2:5],
> -        * in order to provide one queue per EDCA category.
> +        * In DQA mode we use 1 command queue + 1 default queue for
> +        * managment, control, and non-QoS data frames.
> +        * The command is queue sc->txq[0], our default queue is sc->txq[1].
>          *
> -        * Tx aggregation will require additional queues (one queue per TID
> -        * for which aggregation is enabled) but we do not implement this yet.
> +        * Tx aggregation requires additional queues, one queue per TID for
> +        * which aggregation is enabled. We map TID 0-7 to sc->txq[2:9].
> +        * Firmware may assign its own internal IDs for these queues
> +        * depending on which TID gets aggregation enabled first.
> +        * The driver maintains a table mapping driver-side queue IDs
> +        * to firmware-side queue IDs.
>          */
> -       if (qid > IWX_DQA_MIN_MGMT_QUEUE)
> -               return 0;
>
>         err = iwx_dma_contig_alloc(sc->sc_dmat, &ring->bc_tbl,
>             sizeof(struct iwx_agn_scd_bc_tbl), 0);
> @@ -1863,9 +1869,17 @@ iwx_reset_tx_ring(struct iwx_softc *sc, struct iwx_tx_
>         bus_dmamap_sync(sc->sc_dmat, ring->desc_dma.map, 0,
>             ring->desc_dma.size, BUS_DMASYNC_PREWRITE);
>         sc->qfullmsk &= ~(1 << ring->qid);
> +       sc->qenablemsk &= ~(1 << ring->qid);
> +       for (i = 0; i < nitems(sc->aggqid); i++) {
> +               if (sc->aggqid[i] == ring->qid) {
> +                       sc->aggqid[i] = 0;
> +                       break;
> +               }
> +       }
>         ring->queued = 0;
>         ring->cur = 0;
>         ring->tail = 0;
> +       ring->tid = 0;
>  }
>
>  void
> @@ -2372,15 +2386,23 @@ iwx_start_hw(struct iwx_softc *sc)
>  void
>  iwx_stop_device(struct iwx_softc *sc)
>  {
> -       int qid;
> +       struct ieee80211com *ic = &sc->sc_ic;
> +       struct ieee80211_node *ni = ic->ic_bss;
> +       int i;
>
>         iwx_disable_interrupts(sc);
>         sc->sc_flags &= ~IWX_FLAG_USE_ICT;
>
>         iwx_disable_rx_dma(sc);
>         iwx_reset_rx_ring(sc, &sc->rxq);
> -       for (qid = 0; qid < nitems(sc->txq); qid++)
> -               iwx_reset_tx_ring(sc, &sc->txq[qid]);
> +       for (i = 0; i < nitems(sc->txq); i++)
> +               iwx_reset_tx_ring(sc, &sc->txq[i]);
> +       for (i = 0; i < IEEE80211_NUM_TID; i++) {
> +               struct ieee80211_tx_ba *ba = &ni->ni_tx_ba[i];
> +               if (ba->ba_state != IEEE80211_BA_AGREED)
> +                       continue;
> +               ieee80211_delba_request(ic, ni, 0, 1, i);
> +       }
>
>         /* Make sure (redundant) we've released our request to stay awake */
>         IWX_CLRBITS(sc, IWX_CSR_GP_CNTRL,
> @@ -2487,6 +2509,18 @@ iwx_nic_init(struct iwx_softc *sc)
>         return 0;
>  }
>
> +/* Map a TID to an ieee80211_edca_ac category. */
> +const uint8_t iwx_tid_to_ac[IWX_MAX_TID_COUNT] = {
> +       EDCA_AC_BE,
> +       EDCA_AC_BK,
> +       EDCA_AC_BK,
> +       EDCA_AC_BE,
> +       EDCA_AC_VI,
> +       EDCA_AC_VI,
> +       EDCA_AC_VO,
> +       EDCA_AC_VO,
> +};
> +
>  /* Map ieee80211_edca_ac categories to firmware Tx FIFO. */
>  const uint8_t iwx_ac_to_tx_fifo[] = {
>         IWX_GEN2_EDCA_TX_FIFO_BE,
> @@ -2559,6 +2593,9 @@ iwx_enable_txq(struct iwx_softc *sc, int sta_id, int q
>                 err = EIO;
>                 goto out;
>         }
> +
> +       sc->qenablemsk |= (1 << qid);
> +       ring->tid = tid;
>  out:
>         iwx_free_resp(sc, &hcmd);
>         return err;
> @@ -3146,6 +3183,60 @@ iwx_updateedca(struct ieee80211com *ic)
>  }
>
>  void
> +iwx_sta_tx_agg_start(struct iwx_softc *sc, struct ieee80211_node *ni,
> +    uint8_t tid)
> +{
> +       struct ieee80211com *ic = &sc->sc_ic;
> +       struct ieee80211_tx_ba *ba;
> +       int err, qid;
> +       struct iwx_tx_ring *ring;
> +
> +       /* Ensure we can map this TID to an aggregation queue. */
> +       if (tid >= IWX_MAX_TID_COUNT)
> +               return;
> +
> +       ba = &ni->ni_tx_ba[tid];
> +       if (ba->ba_state != IEEE80211_BA_REQUESTED)
> +               return;
> +
> +       qid = sc->aggqid[tid];
> +       if (qid == 0) {
> +               /* Firmware should pick the next unused Tx queue. */
> +               qid = fls(sc->qenablemsk);
> +       }
> +
> +       /*
> +        * Simply enable the queue.
> +        * Firmware handles Tx Ba session setup and teardown.
> +        */
> +       if ((sc->qenablemsk & (1 << qid)) == 0) {
> +               if (!iwx_nic_lock(sc)) {
> +                       ieee80211_addba_resp_refuse(ic, ni, tid,
> +                           IEEE80211_STATUS_UNSPECIFIED);
> +                       return;
> +               }
> +               err = iwx_enable_txq(sc, IWX_STATION_ID, qid, tid,
> +                   IWX_TX_RING_COUNT);
> +               iwx_nic_unlock(sc);
> +               if (err) {
> +                       printf("%s: could not enable Tx queue %d "
> +                           "(error %d)\n", DEVNAME(sc), qid, err);
> +                       ieee80211_addba_resp_refuse(ic, ni, tid,
> +                           IEEE80211_STATUS_UNSPECIFIED);
> +                       return;
> +               }
> +
> +               ba->ba_winstart = 0;
> +       } else
> +               ba->ba_winstart = ni->ni_qos_txseqs[tid];
> +
> +       ring = &sc->txq[qid];
> +       ba->ba_timeout_val = 0;
> +       ieee80211_addba_resp_accept(ic, ni, tid);
> +       sc->aggqid[tid] = qid;
> +}
> +
> +void
>  iwx_ba_task(void *arg)
>  {
>         struct iwx_softc *sc = arg;
> @@ -3157,16 +3248,26 @@ iwx_ba_task(void *arg)
>         for (tid = 0; tid < IWX_MAX_TID_COUNT; tid++) {
>                 if (sc->sc_flags & IWX_FLAG_SHUTDOWN)
>                         break;
> -               if (sc->ba_start_tidmask & (1 << tid)) {
> -                       iwx_sta_rx_agg(sc, ni, tid, sc->ba_ssn[tid],
> -                           sc->ba_winsize[tid], sc->ba_timeout_val[tid], 1);
> -                       sc->ba_start_tidmask &= ~(1 << tid);
> -               } else if (sc->ba_stop_tidmask & (1 << tid)) {
> +               if (sc->ba_rx.start_tidmask & (1 << tid)) {
> +                       struct ieee80211_rx_ba *ba = &ni->ni_rx_ba[tid];
> +                       iwx_sta_rx_agg(sc, ni, tid, ba->ba_winstart,
> +                           ba->ba_winsize, ba->ba_timeout_val, 1);
> +                       sc->ba_rx.start_tidmask &= ~(1 << tid);
> +               } else if (sc->ba_rx.stop_tidmask & (1 << tid)) {
>                         iwx_sta_rx_agg(sc, ni, tid, 0, 0, 0, 0);
> -                       sc->ba_stop_tidmask &= ~(1 << tid);
> +                       sc->ba_rx.stop_tidmask &= ~(1 << tid);
>                 }
>         }
>
> +       for (tid = 0; tid < IWX_MAX_TID_COUNT; tid++) {
> +               if (sc->sc_flags & IWX_FLAG_SHUTDOWN)
> +                       break;
> +               if (sc->ba_tx.start_tidmask & (1 << tid)) {
> +                       iwx_sta_tx_agg_start(sc, ni, tid);
> +                       sc->ba_tx.start_tidmask &= ~(1 << tid);
> +               }
> +       }
> +
>         refcnt_rele_wake(&sc->task_refs);
>         splx(s);
>  }
> @@ -3179,17 +3280,16 @@ int
>  iwx_ampdu_rx_start(struct ieee80211com *ic, struct ieee80211_node *ni,
>      uint8_t tid)
>  {
> -       struct ieee80211_rx_ba *ba = &ni->ni_rx_ba[tid];
>         struct iwx_softc *sc = IC2IFP(ic)->if_softc;
>
>         if (sc->sc_rx_ba_sessions >= IWX_MAX_RX_BA_SESSIONS ||
> -           tid > IWX_MAX_TID_COUNT || (sc->ba_start_tidmask & (1 << tid)))
> +           tid > IWX_MAX_TID_COUNT)
>                 return ENOSPC;
>
> -       sc->ba_start_tidmask |= (1 << tid);
> -       sc->ba_ssn[tid] = ba->ba_winstart;
> -       sc->ba_winsize[tid] = ba->ba_winsize;
> -       sc->ba_timeout_val[tid] = ba->ba_timeout_val;
> +       if (sc->ba_rx.start_tidmask & (1 << tid))
> +               return EBUSY;
> +
> +       sc->ba_rx.start_tidmask |= (1 << tid);
>         iwx_add_task(sc, systq, &sc->ba_task);
>
>         return EBUSY;
> @@ -3205,13 +3305,49 @@ iwx_ampdu_rx_stop(struct ieee80211com *ic, struct ieee
>  {
>         struct iwx_softc *sc = IC2IFP(ic)->if_softc;
>
> -       if (tid > IWX_MAX_TID_COUNT || sc->ba_stop_tidmask & (1 << tid))
> +       if (tid > IWX_MAX_TID_COUNT || sc->ba_rx.stop_tidmask & (1 << tid))
>                 return;
>
> -       sc->ba_stop_tidmask = (1 << tid);
> +       sc->ba_rx.stop_tidmask = (1 << tid);
>         iwx_add_task(sc, systq, &sc->ba_task);
>  }
>
> +int
> +iwx_ampdu_tx_start(struct ieee80211com *ic, struct ieee80211_node *ni,
> +    uint8_t tid)
> +{
> +       struct iwx_softc *sc = IC2IFP(ic)->if_softc;
> +       struct ieee80211_tx_ba *ba = &ni->ni_tx_ba[tid];
> +
> +       /*
> +        * Require a firmware version which uses an internal AUX queue.
> +        * The value of IWX_FIRST_AGG_TX_QUEUE would be incorrect otherwise.
> +        */
> +       if (sc->first_data_qid != IWX_DQA_CMD_QUEUE + 1)
> +               return ENOTSUP;
> +
> +       /* Ensure we can map this TID to an aggregation queue. */
> +       if (tid >= IWX_MAX_TID_COUNT)
> +               return EINVAL;
> +
> +       /* We only support a fixed Tx aggregation window size, for now. */
> +       if (ba->ba_winsize != IWX_FRAME_LIMIT)
> +               return ENOTSUP;
> +
> +       /* Is firmware already using an agg queue with this TID? */
> +       if (sc->aggqid[tid] != 0)
> +               return ENOSPC;
> +
> +       /* Are we already processing an ADDBA request? */
> +       if (sc->ba_tx.start_tidmask & (1 << tid))
> +               return EBUSY;
> +
> +       sc->ba_tx.start_tidmask |= (1 << tid);
> +       iwx_add_task(sc, systq, &sc->ba_task);
> +
> +       return EBUSY;
> +}
> +
>  /* Read the mac address from WFMP registers. */
>  int
>  iwx_set_mac_addr_from_csr(struct iwx_softc *sc, struct iwx_nvm_data *data)
> @@ -4368,22 +4504,22 @@ iwx_rx_mpdu_mq(struct iwx_softc *sc, struct mbuf *m, v
>  }
>
>  void
> -iwx_rx_tx_cmd_single(struct iwx_softc *sc, struct iwx_rx_packet *pkt,
> -    struct iwx_node *in)
> +iwx_clear_tx_desc(struct iwx_softc *sc, struct iwx_tx_ring *ring, int idx)
>  {
> -       struct ieee80211com *ic = &sc->sc_ic;
> -       struct ifnet *ifp = IC2IFP(ic);
> -       struct iwx_tx_resp *tx_resp = (void *)pkt->data;
> -       int status = le16toh(tx_resp->status.status) & IWX_TX_STATUS_MSK;
> -       int txfail;
> -
> -       KASSERT(tx_resp->frame_count == 1);
> +       struct iwx_tfh_tfd *desc = &ring->desc[idx];
> +       uint8_t num_tbs = le16toh(desc->num_tbs) & 0x1f;
> +       int i;
>
> -       txfail = (status != IWX_TX_STATUS_SUCCESS &&
> -           status != IWX_TX_STATUS_DIRECT_DONE);
> +       /* First TB is never cleared - it is bidirectional DMA data. */
> +       for (i = 1; i < num_tbs; i++) {
> +               struct iwx_tfh_tb *tb = &desc->tbs[i];
> +               memset(tb, 0, sizeof(*tb));
> +       }
> +       desc->num_tbs = 0;
>
> -       if (txfail)
> -               ifp->if_oerrors++;
> +       bus_dmamap_sync(sc->sc_dmat, ring->desc_dma.map,
> +           (char *)(void *)desc - (char *)(void *)ring->desc_dma.vaddr,
> +           sizeof(*desc), BUS_DMASYNC_PREWRITE);
>  }
>
>  void
> @@ -4403,16 +4539,57 @@ iwx_txd_done(struct iwx_softc *sc, struct iwx_tx_data
>  }
>
>  void
> +iwx_tx_ba_move_window(struct ieee80211com *ic, int tid, struct mbuf *m)
> +{
> +       struct ieee80211_node *ni = ic->ic_bss;
> +       struct ieee80211_tx_ba *ba = &ni->ni_tx_ba[tid];
> +       struct ieee80211_frame *wh;
> +       uint16_t seq;
> +
> +       if (ba->ba_state != IEEE80211_BA_AGREED)
> +               return;
> +
> +       wh = mtod(m, struct ieee80211_frame *);
> +       if (!ieee80211_has_seq(wh))
> +               return;
> +
> +       seq = letoh16(*(u_int16_t *)wh->i_seq) >> IEEE80211_SEQ_SEQ_SHIFT;
> +       ba->ba_winstart = seq;
> +}
> +
> +void
> +iwx_txq_advance(struct iwx_softc *sc, struct iwx_tx_ring *ring, int idx)
> +{
> +       struct iwx_tx_data *txd;
> +       struct ieee80211com *ic = &sc->sc_ic;
> +
> +       while (ring->tail != idx) {
> +               txd = &ring->data[ring->tail];
> +               if (txd->m != NULL) {
> +                       iwx_clear_tx_desc(sc, ring, ring->tail);
> +                       iwx_tx_update_byte_tbl(ring, ring->tail, 0, 0);
> +                       iwx_txd_done(sc, txd);
> +                       ring->queued--;
> +               }
> +               ring->tail = (ring->tail + 1) % IWX_TX_RING_COUNT;
> +       }
> +
> +       if (ring->qid >= IWX_FIRST_AGG_TX_QUEUE) {
> +               txd = &ring->data[idx];
> +               if (txd->m != NULL)
> +                       iwx_tx_ba_move_window(ic, ring->tid, txd->m);
> +       }
> +}
> +
> +void
>  iwx_rx_tx_cmd(struct iwx_softc *sc, struct iwx_rx_packet *pkt,
>      struct iwx_rx_data *data)
>  {
>         struct ieee80211com *ic = &sc->sc_ic;
>         struct ifnet *ifp = IC2IFP(ic);
>         struct iwx_cmd_header *cmd_hdr = &pkt->hdr;
> -       int idx = cmd_hdr->idx;
> -       int qid = cmd_hdr->qid;
> +       int qid = cmd_hdr->qid, status, txfail;
>         struct iwx_tx_ring *ring = &sc->txq[qid];
> -       struct iwx_tx_data *txd;
>         struct iwx_tx_resp *tx_resp = (void *)pkt->data;
>         uint32_t ssn;
>         uint32_t len = iwx_rx_packet_len(pkt);
> @@ -4422,32 +4599,43 @@ iwx_rx_tx_cmd(struct iwx_softc *sc, struct iwx_rx_pack
>
>         sc->sc_tx_timer = 0;
>
> -       txd = &ring->data[idx];
> -       if (txd->m == NULL)
> +       /* Sanity checks. */
> +       if (sizeof(*tx_resp) > len)
>                 return;
> -
> -       if (sizeof(*tx_resp) + sizeof(ssn) +
> +       if (qid < IWX_FIRST_AGG_TX_QUEUE && tx_resp->frame_count > 1)
> +               return;
> +       if (qid >= IWX_FIRST_AGG_TX_QUEUE && sizeof(*tx_resp) + sizeof(ssn) +
>             tx_resp->frame_count * sizeof(tx_resp->status) > len)
>                 return;
>
> -       iwx_rx_tx_cmd_single(sc, pkt, txd->in);
> +       if (tx_resp->frame_count > 1) /* A-MPDU */
> +               return;
>
> +       status = le16toh(tx_resp->status.status) & IWX_TX_STATUS_MSK;
> +       txfail = (status != IWX_TX_STATUS_SUCCESS &&
> +           status != IWX_TX_STATUS_DIRECT_DONE);
> +
> +       if (txfail)
> +               ifp->if_oerrors++;
> +
>         /*
> -        * Even though this is not an agg queue, we must only free
> -        * frames before the firmware's starting sequence number.
> +        * On hardware supported by iwx(4) the SSN counter is only
> +        * 8 bit and corresponds to a Tx ring index rather than a
> +        * sequence number. Frames up to this index (non-inclusive)
> +        * can now be freed.
>          */
>         memcpy(&ssn, &tx_resp->status + tx_resp->frame_count, sizeof(ssn));
> -       ssn = le32toh(ssn) & 0xfff;
> -       while (ring->tail != IWX_AGG_SSN_TO_TXQ_IDX(ssn)) {
> -               txd = &ring->data[ring->tail];
> -               if (txd->m != NULL) {
> -                       iwx_txd_done(sc, txd);
> -                       iwx_tx_update_byte_tbl(ring, idx, 0, 0);
> -                       ring->queued--;
> -               }
> -               ring->tail = (ring->tail + 1) % IWX_TX_RING_COUNT;
> -       }
> +       ssn = le32toh(ssn) & 0xff;
> +       iwx_txq_advance(sc, ring, ssn);
> +       iwx_clear_oactive(sc, ring);
> +}
>
> +void
> +iwx_clear_oactive(struct iwx_softc *sc, struct iwx_tx_ring *ring)
> +{
> +       struct ieee80211com *ic = &sc->sc_ic;
> +       struct ifnet *ifp = IC2IFP(ic);
> +
>         if (ring->queued < IWX_TX_RING_LOMARK) {
>                 sc->qfullmsk &= ~(1 << ring->qid);
>                 if (sc->qfullmsk == 0 && ifq_is_oactive(&ifp->if_snd)) {
> @@ -4463,6 +4651,64 @@ iwx_rx_tx_cmd(struct iwx_softc *sc, struct iwx_rx_pack
>  }
>
>  void
> +iwx_rx_compressed_ba(struct iwx_softc *sc, struct iwx_rx_packet *pkt,
> +    struct iwx_rx_data *data)
> +{
> +       struct iwx_compressed_ba_notif *ba_res = (void *)pkt->data;
> +       struct ieee80211com *ic = &sc->sc_ic;
> +       struct ieee80211_node *ni;
> +       struct ieee80211_tx_ba *ba;
> +       struct iwx_node *in;
> +       struct iwx_tx_ring *ring;
> +       uint16_t i, tfd_cnt, ra_tid_cnt, idx;
> +       int qid;
> +
> +       if (ic->ic_state != IEEE80211_S_RUN)
> +               return;
> +
> +       if (iwx_rx_packet_payload_len(pkt) < sizeof(*ba_res))
> +               return;
> +
> +       if (ba_res->sta_id != IWX_STATION_ID)
> +               return;
> +
> +       ni = ic->ic_bss;
> +       in = (void *)ni;
> +
> +       tfd_cnt = le16toh(ba_res->tfd_cnt);
> +       ra_tid_cnt = le16toh(ba_res->ra_tid_cnt);
> +       if (!tfd_cnt || iwx_rx_packet_payload_len(pkt) < (sizeof(*ba_res) +
> +           sizeof(ba_res->ra_tid[0]) * ra_tid_cnt +
> +           sizeof(ba_res->tfd[0]) * tfd_cnt))
> +               return;
> +
> +       for (i = 0; i < tfd_cnt; i++) {
> +               struct iwx_compressed_ba_tfd *ba_tfd = &ba_res->tfd[i];
> +               uint8_t tid;
> +
> +               tid = ba_tfd->tid;
> +               if (tid >= nitems(sc->aggqid))
> +                       continue;
> +
> +               qid = sc->aggqid[tid];
> +               if (qid != htole16(ba_tfd->q_num))
> +                       continue;
> +
> +               ring = &sc->txq[qid];
> +
> +               ba = &ni->ni_tx_ba[tid];
> +               if (ba->ba_state != IEEE80211_BA_AGREED)
> +                       continue;
> +
> +               idx = le16toh(ba_tfd->tfd_index);
> +               if (idx >= IWX_TX_RING_COUNT)
> +                       continue;
> +               iwx_txq_advance(sc, ring, idx);
> +               iwx_clear_oactive(sc, ring);
> +       }
> +}
> +
> +void
>  iwx_rx_bmiss(struct iwx_softc *sc, struct iwx_rx_packet *pkt,
>      struct iwx_rx_data *data)
>  {
> @@ -5058,7 +5304,7 @@ iwx_tx_update_byte_tbl(struct iwx_tx_ring *txq, int id
>  }
>
>  int
> -iwx_tx(struct iwx_softc *sc, struct mbuf *m, struct ieee80211_node *ni, int 
> ac)
> +iwx_tx(struct iwx_softc *sc, struct mbuf *m, struct ieee80211_node *ni)
>  {
>         struct ieee80211com *ic = &sc->sc_ic;
>         struct iwx_node *in = (void *)ni;
> @@ -5074,25 +5320,36 @@ iwx_tx(struct iwx_softc *sc, struct mbuf *m, struct ie
>         u_int hdrlen;
>         bus_dma_segment_t *seg;
>         uint16_t num_tbs;
> -       uint8_t type;
> -       int i, totlen, err, pad;
> +       uint8_t type, subtype;
> +       int i, totlen, err, pad, qid;
>
>         wh = mtod(m, struct ieee80211_frame *);
> -       hdrlen = ieee80211_get_hdrlen(wh);
>         type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
> +       subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
> +       if (type == IEEE80211_FC0_TYPE_CTL)
> +               hdrlen = sizeof(struct ieee80211_frame_min);
> +       else
> +               hdrlen = ieee80211_get_hdrlen(wh);
>
> -       /*
> -        * Map EDCA categories to Tx data queues.
> -        *
> -        * We use static data queue assignments even in DQA mode. We do not
> -        * need to share Tx queues between stations because we only implement
> -        * client mode; the firmware's station table contains only one entry
> -        * which represents our access point.
> -        *
> -        * Tx aggregation will require additional queues (one queue per TID
> -        * for which aggregation is enabled) but we do not implement this yet.
> -        */
> -       ring = &sc->txq[ac + sc->first_data_qid];
> +       qid = sc->first_data_qid;
> +
> +       /* Put QoS frames on the data queue which maps to their TID. */
> +       if (ieee80211_has_qos(wh)) {
> +               struct ieee80211_tx_ba *ba;
> +               uint16_t qos = ieee80211_get_qos(wh);
> +               uint8_t tid = qos & IEEE80211_QOS_TID;
> +
> +               ba = &ni->ni_tx_ba[tid];
> +               if (!IEEE80211_IS_MULTICAST(wh->i_addr1) &&
> +                   type == IEEE80211_FC0_TYPE_DATA &&
> +                   subtype != IEEE80211_FC0_SUBTYPE_NODATA &&
> +                   sc->aggqid[tid] != 0 &&
> +                   ba->ba_state == IEEE80211_BA_AGREED) {
> +                       qid = sc->aggqid[tid];
> +               }
> +       }
> +
> +       ring = &sc->txq[qid];
>         desc = &ring->desc[ring->cur];
>         memset(desc, 0, sizeof(*desc));
>         data = &ring->data[ring->cur];
> @@ -5243,18 +5500,167 @@ iwx_tx(struct iwx_softc *sc, struct mbuf *m, struct 
> ie
>  }
>
>  int
> -iwx_flush_tx_path(struct iwx_softc *sc)
> +iwx_flush_sta_tids(struct iwx_softc *sc, int sta_id, uint16_t tids)
>  {
> +       struct iwx_rx_packet *pkt;
> +       struct iwx_tx_path_flush_cmd_rsp *resp;
>         struct iwx_tx_path_flush_cmd flush_cmd = {
> -               .sta_id = htole32(IWX_STATION_ID),
> -               .tid_mask = htole16(0xffff),
> +               .sta_id = htole32(sta_id),
> +               .tid_mask = htole16(tids),
>         };
> +       struct iwx_host_cmd hcmd = {
> +               .id = IWX_TXPATH_FLUSH,
> +               .len = { sizeof(flush_cmd), },
> +               .data = { &flush_cmd, },
> +               .flags = IWX_CMD_WANT_RESP,
> +               .resp_pkt_len = sizeof(*pkt) + sizeof(*resp),
> +       };
> +       int err, resp_len, i, num_flushed_queues;
> +
> +       err = iwx_send_cmd(sc, &hcmd);
> +       if (err)
> +               return err;
> +
> +       pkt = hcmd.resp_pkt;
> +       if (!pkt || (pkt->hdr.flags & IWX_CMD_FAILED_MSK)) {
> +               err = EIO;
> +               goto out;
> +       }
> +
> +       resp_len = iwx_rx_packet_payload_len(pkt);
> +       /* Some firmware versions don't provide a response. */
> +       if (resp_len == 0)
> +               goto out;
> +       else if (resp_len != sizeof(*resp)) {
> +               err = EIO;
> +               goto out;
> +       }
> +
> +       resp = (void *)pkt->data;
> +
> +       if (le16toh(resp->sta_id) != sta_id) {
> +               err = EIO;
> +               goto out;
> +       }
> +
> +       num_flushed_queues = le16toh(resp->num_flushed_queues);
> +       if (num_flushed_queues > IWX_TX_FLUSH_QUEUE_RSP) {
> +               err = EIO;
> +               goto out;
> +       }
> +
> +       for (i = 0; i < num_flushed_queues; i++) {
> +               struct iwx_flush_queue_info *queue_info = &resp->queues[i];
> +               uint16_t tid = le16toh(queue_info->tid);
> +               uint16_t read_after = le16toh(queue_info->read_after_flush);
> +               uint16_t qid = le16toh(queue_info->queue_num);
> +               struct iwx_tx_ring *txq;
> +
> +               if (qid >= nitems(sc->txq))
> +                       continue;
> +
> +               txq = &sc->txq[qid];
> +               if (tid != txq->tid)
> +                       continue;
> +
> +               iwx_txq_advance(sc, txq, read_after);
> +       }
> +out:
> +       iwx_free_resp(sc, &hcmd);
> +       return err;
> +}
> +
> +#define IWX_FLUSH_WAIT_MS      2000
> +
> +int
> +iwx_wait_tx_queues_empty(struct iwx_softc *sc)
> +{
> +       int i, err;
> +
> +       for (i = 0; i < nitems(sc->txq); i++) {
> +               struct iwx_tx_ring *ring = &sc->txq[i];
> +
> +               if (i == IWX_DQA_CMD_QUEUE)
> +                       continue;
> +
> +               while (ring->queued > 0) {
> +                       err = tsleep_nsec(ring, 0, "iwxflush",
> +                           MSEC_TO_NSEC(IWX_FLUSH_WAIT_MS));
> +                       if (err)
> +                               return err;
> +               }
> +       }
> +
> +       return 0;
> +}
> +
> +int
> +iwx_drain_sta(struct iwx_softc *sc, struct iwx_node* in, int drain)
> +{
> +       struct iwx_add_sta_cmd cmd;
>         int err;
> +       uint32_t status;
>
> -       err = iwx_send_cmd_pdu(sc, IWX_TXPATH_FLUSH, 0,
> -           sizeof(flush_cmd), &flush_cmd);
> +       memset(&cmd, 0, sizeof(cmd));
> +       cmd.mac_id_n_color = htole32(IWX_FW_CMD_ID_AND_COLOR(in->in_id,
> +           in->in_color));
> +       cmd.sta_id = IWX_STATION_ID;
> +       cmd.add_modify = IWX_STA_MODE_MODIFY;
> +       cmd.station_flags = drain ? htole32(IWX_STA_FLG_DRAIN_FLOW) : 0;
> +       cmd.station_flags_msk = htole32(IWX_STA_FLG_DRAIN_FLOW);
> +
> +       status = IWX_ADD_STA_SUCCESS;
> +       err = iwx_send_cmd_pdu_status(sc, IWX_ADD_STA,
> +           sizeof(cmd), &cmd, &status);
> +       if (err) {
> +               printf("%s: could not update sta (error %d)\n",
> +                   DEVNAME(sc), err);
> +               return err;
> +       }
> +
> +       switch (status & IWX_ADD_STA_STATUS_MASK) {
> +       case IWX_ADD_STA_SUCCESS:
> +               break;
> +       default:
> +               err = EIO;
> +               printf("%s: Couldn't %s draining for station\n",
> +                   DEVNAME(sc), drain ? "enable" : "disable");
> +               break;
> +       }
> +
> +       return err;
> +}
> +
> +int
> +iwx_flush_sta(struct iwx_softc *sc, struct iwx_node *in)
> +{
> +       int err;
> +
> +       splassert(IPL_NET);
> +
> +       sc->sc_flags |= IWX_FLAG_TXFLUSH;
> +
> +       err = iwx_drain_sta(sc, in, 1);
>         if (err)
> -                printf("%s: Flushing tx queue failed: %d\n", DEVNAME(sc), 
> err);
> +               goto done;
> +
> +       err = iwx_flush_sta_tids(sc, IWX_STATION_ID, 0xffff);
> +       if (err) {
> +               printf("%s: could not flush Tx path (error %d)\n",
> +                   DEVNAME(sc), err);
> +               goto done;
> +       }
> +
> +       err = iwx_wait_tx_queues_empty(sc);
> +       if (err) {
> +               printf("%s: Could not empty Tx queues (error %d)\n",
> +                   DEVNAME(sc), err);
> +               goto done;
> +       }
> +
> +       err = iwx_drain_sta(sc, in, 0);
> +done:
> +       sc->sc_flags &= ~IWX_FLAG_TXFLUSH;
>         return err;
>  }
>
> @@ -5421,9 +5827,6 @@ iwx_add_sta_cmd(struct iwx_softc *sc, struct iwx_node
>         add_sta_cmd.add_modify = update ? 1 : 0;
>         add_sta_cmd.station_flags_msk
>             |= htole32(IWX_STA_FLG_FAT_EN_MSK | IWX_STA_FLG_MIMO_EN_MSK);
> -       add_sta_cmd.tid_disable_tx = htole16(0xffff);
> -       if (update)
> -               add_sta_cmd.modify_mask |= (IWX_STA_MODIFY_TID_DISABLE_TX);
>
>         if (in->in_ni.ni_flags & IEEE80211_NODE_HT) {
>                 add_sta_cmd.station_flags_msk
> @@ -5495,7 +5898,6 @@ iwx_add_aux_sta(struct iwx_softc *sc)
>         cmd.station_type = IWX_STA_AUX_ACTIVITY;
>         cmd.mac_id_n_color =
>             htole32(IWX_FW_CMD_ID_AND_COLOR(IWX_MAC_INDEX_AUX, 0));
> -       cmd.tid_disable_tx = htole16(0xffff);
>
>         status = IWX_ADD_STA_SUCCESS;
>         err = iwx_send_cmd_pdu_status(sc, IWX_ADD_STA, sizeof(cmd), &cmd,
> @@ -5529,6 +5931,46 @@ iwx_rm_sta_cmd(struct iwx_softc *sc, struct iwx_node *
>         return err;
>  }
>
> +int
> +iwx_rm_sta(struct iwx_softc *sc, struct iwx_node *in)
> +{
> +       struct ieee80211com *ic = &sc->sc_ic;
> +       struct ieee80211_node *ni = &in->in_ni;
> +       int err, i;
> +
> +       err = iwx_flush_sta(sc, in);
> +       if (err) {
> +               printf("%s: could not flush Tx path (error %d)\n",
> +                   DEVNAME(sc), err);
> +               return err;
> +       }
> +       err = iwx_rm_sta_cmd(sc, in);
> +       if (err) {
> +               printf("%s: could not remove STA (error %d)\n",
> +                   DEVNAME(sc), err);
> +               return err;
> +       }
> +
> +       in->in_flags = 0;
> +
> +       sc->sc_rx_ba_sessions = 0;
> +       sc->ba_rx.start_tidmask = 0;
> +       sc->ba_rx.stop_tidmask = 0;
> +       memset(sc->aggqid, 0, sizeof(sc->aggqid));
> +       sc->ba_tx.start_tidmask = 0;
> +       sc->ba_tx.stop_tidmask = 0;
> +       for (i = IWX_FIRST_AGG_TX_QUEUE; i < IWX_LAST_AGG_TX_QUEUE; i++)
> +               sc->qenablemsk &= ~(1 << i);
> +       for (i = 0; i < IEEE80211_NUM_TID; i++) {
> +               struct ieee80211_tx_ba *ba = &ni->ni_tx_ba[i];
> +               if (ba->ba_state != IEEE80211_BA_AGREED)
> +                       continue;
> +               ieee80211_delba_request(ic, ni, 0, 1, i);
> +       }
> +
> +       return 0;
> +}
> +
>  uint8_t
>  iwx_umac_scan_fill_channels(struct iwx_softc *sc,
>      struct iwx_scan_channel_cfg_umac *chan, size_t chan_nitems,
> @@ -6673,9 +7115,9 @@ iwx_scan_abort(struct iwx_softc *sc)
>  }
>
>  int
> -iwx_enable_data_tx_queues(struct iwx_softc *sc)
> +iwx_enable_mgmt_queue(struct iwx_softc *sc)
>  {
> -       int err, ac, cmdver;
> +       int err, cmdver;
>
>         /*
>          * ADD_STA command version >= 12 implies that firmware uses
> @@ -6689,19 +7131,16 @@ iwx_enable_data_tx_queues(struct iwx_softc *sc)
>         else
>                 sc->first_data_qid = IWX_DQA_AUX_QUEUE + 1;
>
> -       for (ac = 0; ac < EDCA_NUM_AC; ac++) {
> -               int qid = ac + sc->first_data_qid;
> -               /*
> -                * Regular data frames use the "MGMT" TID and queue.
> -                * Other TIDs and queues are reserved for frame aggregation.
> -                */
> -               err = iwx_enable_txq(sc, IWX_STATION_ID, qid, IWX_MGMT_TID,
> -                   IWX_TX_RING_COUNT);
> -               if (err) {
> -                       printf("%s: could not enable Tx queue %d (error 
> %d)\n",
> -                           DEVNAME(sc), qid, err);
> -                       return err;
> -               }
> +       /*
> +        * Non-QoS frames use the "MGMT" TID and queue.
> +        * Other TIDs and data queues are reserved for QoS data frames.
> +        */
> +       err = iwx_enable_txq(sc, IWX_STATION_ID, sc->first_data_qid,
> +           IWX_MGMT_TID, IWX_TX_RING_COUNT);
> +       if (err) {
> +               printf("%s: could not enable Tx queue %d (error %d)\n",
> +                   DEVNAME(sc), sc->first_data_qid, err);
> +               return err;
>         }
>
>         return 0;
> @@ -6779,7 +7218,7 @@ iwx_rs_init(struct iwx_softc *sc, struct iwx_node *in)
>         cfg_cmd.sta_id = IWX_STATION_ID;
>         cfg_cmd.max_ch_width = IWX_RATE_MCS_CHAN_WIDTH_20;
>         cfg_cmd.chains = IWX_TLC_MNG_CHAIN_A_MSK | IWX_TLC_MNG_CHAIN_B_MSK;
> -       cfg_cmd.max_mpdu_len = IEEE80211_MAX_LEN;
> +       cfg_cmd.max_mpdu_len = 3839;
>         if (ieee80211_node_supports_ht_sgi20(ni))
>                 cfg_cmd.sgi_ch_width_supp = (1 << IWX_TLC_MNG_CH_WIDTH_20MHZ);
>
> @@ -6924,7 +7363,7 @@ iwx_auth(struct iwx_softc *sc)
>                 return 0;
>         }
>
> -       err = iwx_enable_data_tx_queues(sc);
> +       err = iwx_enable_mgmt_queue(sc);
>         if (err)
>                 goto rm_sta;
>
> @@ -6977,21 +7416,10 @@ iwx_deauth(struct iwx_softc *sc)
>         iwx_unprotect_session(sc, in);
>
>         if (sc->sc_flags & IWX_FLAG_STA_ACTIVE) {
> -               err = iwx_flush_tx_path(sc);
> -               if (err) {
> -                       printf("%s: could not flush Tx path (error %d)\n",
> -                           DEVNAME(sc), err);
> +               err = iwx_rm_sta(sc, in);
> +               if (err)
>                         return err;
> -               }
> -               err = iwx_rm_sta_cmd(sc, in);
> -               if (err) {
> -                       printf("%s: could not remove STA (error %d)\n",
> -                           DEVNAME(sc), err);
> -                       return err;
> -               }
>                 sc->sc_flags &= ~IWX_FLAG_STA_ACTIVE;
> -               sc->sc_rx_ba_sessions = 0;
> -               in->in_flags = 0;
>         }
>
>         if (sc->sc_flags & IWX_FLAG_BINDING_ACTIVE) {
> @@ -7041,7 +7469,7 @@ iwx_assoc(struct iwx_softc *sc)
>         }
>
>         if (!update_sta)
> -               err = iwx_enable_data_tx_queues(sc);
> +               err = iwx_enable_mgmt_queue(sc);
>
>         return err;
>  }
> @@ -7056,19 +7484,10 @@ iwx_disassoc(struct iwx_softc *sc)
>         splassert(IPL_NET);
>
>         if (sc->sc_flags & IWX_FLAG_STA_ACTIVE) {
> -               err = iwx_rm_sta_cmd(sc, in);
> -               if (err) {
> -                       printf("%s: could not remove STA (error %d)\n",
> -                           DEVNAME(sc), err);
> +               err = iwx_rm_sta(sc, in);
> +               if (err)
>                         return err;
> -               }
>                 sc->sc_flags &= ~IWX_FLAG_STA_ACTIVE;
> -               in->in_flags = 0;
> -               sc->sc_rx_ba_sessions = 0;
> -               sc->ba_start_tidmask = 0;
> -               sc->ba_stop_tidmask = 0;
> -               sc->ba_start_tidmask = 0;
> -               sc->ba_stop_tidmask = 0;
>         }
>
>         return 0;
> @@ -7184,6 +7603,15 @@ iwx_run_stop(struct iwx_softc *sc)
>
>         splassert(IPL_NET);
>
> +       if (sc->sc_flags & IWX_FLAG_STA_ACTIVE) {
> +               err = iwx_flush_sta(sc, in);
> +               if (err) {
> +                       printf("%s: could not flush Tx path (error %d)\n",
> +                           DEVNAME(sc), err);
> +                       return err;
> +               }
> +       }
> +
>         err = iwx_sf_config(sc, IWX_SF_INIT_OFF);
>         if (err)
>                 return err;
> @@ -8024,7 +8452,6 @@ iwx_start(struct ifnet *ifp)
>         struct ieee80211_node *ni;
>         struct ether_header *eh;
>         struct mbuf *m;
> -       int ac = EDCA_AC_BE; /* XXX */
>
>         if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd))
>                 return;
> @@ -8036,6 +8463,10 @@ iwx_start(struct ifnet *ifp)
>                         break;
>                 }
>
> +               /* Don't queue additional frames while flushing Tx queues. */
> +               if (sc->sc_flags & IWX_FLAG_TXFLUSH)
> +                       break;
> +
>                 /* need to send management frames even if we're not RUNning */
>                 m = mq_dequeue(&ic->ic_mgtq);
>                 if (m) {
> @@ -8069,7 +8500,7 @@ iwx_start(struct ifnet *ifp)
>                 if (ic->ic_rawbpf != NULL)
>                         bpf_mtap(ic->ic_rawbpf, m, BPF_DIRECTION_OUT);
>  #endif
> -               if (iwx_tx(sc, m, ni, ac) != 0) {
> +               if (iwx_tx(sc, m, ni) != 0) {
>                         ieee80211_release_node(ic, ni);
>                         ifp->if_oerrors++;
>                         continue;
> @@ -8130,13 +8561,14 @@ iwx_stop(struct ifnet *ifp)
>         sc->sc_flags &= ~IWX_FLAG_TE_ACTIVE;
>         sc->sc_flags &= ~IWX_FLAG_HW_ERR;
>         sc->sc_flags &= ~IWX_FLAG_SHUTDOWN;
> +       sc->sc_flags &= ~IWX_FLAG_TXFLUSH;
>
>         sc->sc_rx_ba_sessions = 0;
> -       sc->ba_start_tidmask = 0;
> -       sc->ba_stop_tidmask = 0;
> -       memset(sc->ba_ssn, 0, sizeof(sc->ba_ssn));
> -       memset(sc->ba_winsize, 0, sizeof(sc->ba_winsize));
> -       memset(sc->ba_timeout_val, 0, sizeof(sc->ba_timeout_val));
> +       sc->ba_rx.start_tidmask = 0;
> +       sc->ba_rx.stop_tidmask = 0;
> +       memset(sc->aggqid, 0, sizeof(sc->aggqid));
> +       sc->ba_tx.start_tidmask = 0;
> +       sc->ba_tx.stop_tidmask = 0;
>
>         sc->sc_newstate(ic, IEEE80211_S_INIT, -1);
>
> @@ -8484,7 +8916,7 @@ iwx_dump_driver_status(struct iwx_softc *sc)
>         int i;
>
>         printf("driver status:\n");
> -       for (i = 0; i < IWX_MAX_QUEUES; i++) {
> +       for (i = 0; i < nitems(sc->txq); i++) {
>                 struct iwx_tx_ring *ring = &sc->txq[i];
>                 printf("  tx ring %2d: qid=%-2d cur=%-3d "
>                     "queued=%-3d\n",
> @@ -8618,6 +9050,10 @@ iwx_rx_pkt(struct iwx_softc *sc, struct iwx_rx_data *d
>                         iwx_rx_tx_cmd(sc, pkt, data);
>                         break;
>
> +               case IWX_BA_NOTIF:
> +                       iwx_rx_compressed_ba(sc, pkt, data);
> +                       break;
> +
>                 case IWX_MISSED_BEACONS_NOTIFICATION:
>                         iwx_rx_bmiss(sc, pkt, data);
>                         break;
> @@ -9424,6 +9860,8 @@ iwx_attach(struct device *parent, struct device *self,
>
>         /* Set device capabilities. */
>         ic->ic_caps =
> +           IEEE80211_C_QOS | IEEE80211_C_TX_AMPDU | /* A-MPDU */
> +           IEEE80211_C_ADDBA_OFFLOAD | /* device sends ADDBA/DELBA frames */
>             IEEE80211_C_WEP |           /* WEP */
>             IEEE80211_C_RSN |           /* WPA/RSN */
>             IEEE80211_C_SCANALL |       /* device scans all channels at once 
> */
> @@ -9497,10 +9935,8 @@ iwx_attach(struct device *parent, struct device *self,
>         ic->ic_updateedca = iwx_updateedca;
>         ic->ic_ampdu_rx_start = iwx_ampdu_rx_start;
>         ic->ic_ampdu_rx_stop = iwx_ampdu_rx_stop;
> -#ifdef notyet
>         ic->ic_ampdu_tx_start = iwx_ampdu_tx_start;
> -       ic->ic_ampdu_tx_stop = iwx_ampdu_tx_stop;
> -#endif
> +       ic->ic_ampdu_tx_stop = NULL;
>         /*
>          * We cannot read the MAC address without loading the
>          * firmware from disk. Postpone until mountroot is done.
> blob - 42dfb8332b03a6085572840789bb1cae76959121
> blob + 34899e0e47f424d4127d7b76ac92bffd7de9ae69
> --- sys/dev/pci/if_iwxreg.h
> +++ sys/dev/pci/if_iwxreg.h
> @@ -1393,9 +1393,6 @@ struct iwx_gen3_bc_tbl {
>         uint16_t tfd_offset[IWX_TFD_QUEUE_BC_SIZE_GEN3];
>  } __packed;
>
> -/* Maximum number of Tx queues. */
> -#define IWX_MAX_QUEUES 31
> -
>  /**
>   * DQA - Dynamic Queue Allocation -introduction
>   *
> @@ -1410,27 +1407,27 @@ struct iwx_gen3_bc_tbl {
>   * some queues that are statically allocated:
>   *     TXQ #0 - command queue
>   *     TXQ #1 - aux frames
> - *     TXQ #2 - P2P device frames
> - *     TXQ #3 - P2P GO/SoftAP GCAST/BCAST frames
> - *     TXQ #4 - BSS DATA frames queue
> - *     TXQ #5-8 - non-QoS data, QoS no-data, and MGMT frames queue pool
> - *     TXQ #9 - P2P GO/SoftAP probe responses
> - *     TXQ #10-31 - QoS DATA frames queue pool (for Tx aggregation)
>   */
>
>  /* static DQA Tx queue numbers */
>  #define IWX_DQA_CMD_QUEUE              0
>  #define IWX_DQA_AUX_QUEUE              1
> -#define IWX_DQA_P2P_DEVICE_QUEUE       2
> -#define IWX_DQA_INJECT_MONITOR_QUEUE   2
> -#define IWX_DQA_GCAST_QUEUE            3
> -#define IWX_DQA_BSS_CLIENT_QUEUE       4
> -#define IWX_DQA_MIN_MGMT_QUEUE         5
> -#define IWX_DQA_MAX_MGMT_QUEUE         8
> -#define IWX_DQA_AP_PROBE_RESP_QUEUE    9
> -#define IWX_DQA_MIN_DATA_QUEUE         10
> -#define IWX_DQA_MAX_DATA_QUEUE         31
>
> +#define IWX_DQA_INJECT_MONITOR_QUEUE   2 /* used in monitor mode only */
> +#define IWX_DQA_MGMT_QUEUE             1 /* default queue other modes */
> +
> +/* Reserve 8 DQA Tx queues for QoS data frames. */
> +#define IWX_MAX_TID_COUNT      8
> +#define IWX_FIRST_AGG_TX_QUEUE (IWX_DQA_MGMT_QUEUE + 1)
> +#define IWX_LAST_AGG_TX_QUEUE  (IWX_FIRST_AGG_TX_QUEUE + IWX_MAX_TID_COUNT - 
> 1)
> +
> +/**
> + * Max Tx window size is the max number of contiguous TFDs that the scheduler
> + * can keep track of at one time when creating block-ack chains of frames.
> + * Note that "64" matches the number of ack bits in a block-ack packet.
> + */
> +#define IWX_FRAME_LIMIT        64
> +
>  #define IWX_TX_FIFO_BK 0
>  #define IWX_TX_FIFO_BE 1
>  #define IWX_TX_FIFO_VI 2
> @@ -4952,7 +4949,6 @@ struct iwx_tlc_update_notif {
>  /*
>   * TID for non QoS frames - to be written in tid_tspec
>   */
> -#define IWX_MAX_TID_COUNT      8
>  #define IWX_TID_NON_QOS        0
>
>  /*
> @@ -5290,35 +5286,96 @@ struct iwx_tx_resp {
>  } __packed; /* TX_RSP_API_S_VER_6 */
>
>  /**
> - * struct iwx_ba_notif - notifies about reception of BA
> - * ( IWX_BA_NOTIF = 0xc5 )
> - * @sta_addr_lo32: lower 32 bits of the MAC address
> - * @sta_addr_hi16: upper 16 bits of the MAC address
> + * struct iwx_compressed_ba_tfd - progress of a TFD queue
> + * @q_num: TFD queue number
> + * @tfd_index: Index of first un-acked frame in the  TFD queue
> + * @scd_queue: For debug only - the physical queue the TFD queue is bound to
> + * @tid: TID of the queue (0-7)
> + * @reserved: reserved for alignment
> + */
> +struct iwx_compressed_ba_tfd {
> +       uint16_t q_num;
> +       uint16_t tfd_index;
> +       uint8_t scd_queue;
> +       uint8_t tid;
> +       uint8_t reserved[2];
> +} __packed; /* COMPRESSED_BA_TFD_API_S_VER_1 */
> +
> +/**
> + * struct iwx_compressed_ba_ratid - progress of a RA TID queue
> + * @q_num: RA TID queue number
> + * @tid: TID of the queue
> + * @ssn: BA window current SSN
> + */
> +struct iwx_compressed_ba_ratid {
> +       uint8_t q_num;
> +       uint8_t tid;
> +       uint16_t ssn;
> +} __packed; /* COMPRESSED_BA_RATID_API_S_VER_1 */
> +
> +/*
> + * enum iwx_ba_resp_flags - TX aggregation status
> + * @IWX_MVM_BA_RESP_TX_AGG: generated due to BA
> + * @IWX_MVM_BA_RESP_TX_BAR: generated due to BA after BAR
> + * @IWX_MVM_BA_RESP_TX_AGG_FAIL: aggregation didn't receive BA
> + * @IWX_MVM_BA_RESP_TX_UNDERRUN: aggregation got underrun
> + * @IWX_MVM_BA_RESP_TX_BT_KILL: aggregation got BT-kill
> + * @IWX_MVM_BA_RESP_TX_DSP_TIMEOUT: aggregation didn't finish within the
> + *     expected time
> + */
> +enum iwx_ba_resp_flags {
> +       IWX_MVM_BA_RESP_TX_AGG,
> +       IWX_MVM_BA_RESP_TX_BAR,
> +       IWX_MVM_BA_RESP_TX_AGG_FAIL,
> +       IWX_MVM_BA_RESP_TX_UNDERRUN,
> +       IWX_MVM_BA_RESP_TX_BT_KILL,
> +       IWX_MVM_BA_RESP_TX_DSP_TIMEOUT
> +};
> +
> +/**
> + * struct iwx_compressed_ba_notif - notifies about reception of BA
> + * ( BA_NOTIF = 0xc5 )
> + * @flags: status flag, see the &iwx_ba_resp_flags
>   * @sta_id: Index of recipient (BA-sending) station in fw's station table
> - * @tid: tid of the session
> - * @seq_ctl:
> - * @bitmap: the bitmap of the BA notification as seen in the air
> - * @scd_flow: the tx queue this BA relates to
> - * @scd_ssn: the index of the last contiguously sent packet
> - * @txed: number of Txed frames in this batch
> - * @txed_2_done: number of Acked frames in this batch
> + * @reduced_txp: power reduced according to TPC. This is the actual value and
> + *     not a copy from the LQ command. Thus, if not the first rate was used
> + *     for Tx-ing then this value will be set to 0 by FW.
> + * @tlc_rate_info: TLC rate info, initial rate index, TLC table color
> + * @retry_cnt: retry count
> + * @query_byte_cnt: SCD query byte count
> + * @query_frame_cnt: SCD query frame count
> + * @txed: number of frames sent in the aggregation (all-TIDs)
> + * @done: number of frames that were Acked by the BA (all-TIDs)
> + * @reserved: reserved (for alignment)
> + * @wireless_time: Wireless-media time
> + * @tx_rate: the rate the aggregation was sent at
> + * @tfd_cnt: number of TFD-Q elements
> + * @ra_tid_cnt: number of RATID-Q elements
> + * @tfd: array of TFD queue status updates. See &iwx_compressed_ba_tfd
> + *     for details. Length in @tfd_cnt.
> + * @ra_tid: array of RA-TID queue status updates. For debug purposes only. 
> See
> + *     &iwx_compressed_ba_ratid for more details. Length in @ra_tid_cnt.
>   */
> -struct iwx_ba_notif {
> -       uint32_t sta_addr_lo32;
> -       uint16_t sta_addr_hi16;
> -       uint16_t reserved;
> -
> +struct iwx_compressed_ba_notif {
> +       uint32_t flags;
>         uint8_t sta_id;
> -       uint8_t tid;
> -       uint16_t seq_ctl;
> -       uint64_t bitmap;
> -       uint16_t scd_flow;
> -       uint16_t scd_ssn;
> -       uint8_t txed;
> -       uint8_t txed_2_done;
> -       uint16_t reserved1;
> -} __packed;
> +       uint8_t reduced_txp;
> +       uint8_t tlc_rate_info;
> +       uint8_t retry_cnt;
> +       uint32_t query_byte_cnt;
> +       uint16_t query_frame_cnt;
> +       uint16_t txed;
> +       uint16_t done;
> +       uint16_t reserved;
> +       uint32_t wireless_time;
> +       uint32_t tx_rate;
> +       uint16_t tfd_cnt;
> +       uint16_t ra_tid_cnt;
> +       struct iwx_compressed_ba_ratid ra_tid[0];
> +       struct iwx_compressed_ba_tfd tfd[];
> +} __packed; /* COMPRESSED_BA_RES_API_S_VER_4 */
>
> +
>  struct iwx_beacon_notif {
>         struct iwx_tx_resp_v3 beacon_notify_hdr;
>         uint64_t tsf;
> @@ -5356,7 +5413,34 @@ struct iwx_tx_path_flush_cmd {
>         uint16_t reserved;
>  } __packed; /* TX_PATH_FLUSH_CMD_API_S_VER_2 */
>
> +#define IWX_TX_FLUSH_QUEUE_RSP 16
> +
>  /**
> + * struct iwx_flush_queue_info - virtual flush queue info
> + * @queue_num: virtual queue id
> + * @read_before_flush: read pointer before flush
> + * @read_after_flush: read pointer after flush
> + */
> +struct iwx_flush_queue_info {
> +       uint16_t tid;
> +       uint16_t queue_num;
> +       uint16_t read_before_flush;
> +       uint16_t read_after_flush;
> +} __packed; /* TFDQ_FLUSH_INFO_API_S_VER_1 */
> +
> +/**
> + * struct iwx_tx_path_flush_cmd_rsp -- queue/FIFO flush command response
> + * @num_flushed_queues: number of queues in queues array
> + * @queues: all flushed queues
> + */
> +struct iwx_tx_path_flush_cmd_rsp {
> +       uint16_t sta_id;
> +       uint16_t num_flushed_queues;
> +       struct iwx_flush_queue_info queues[IWX_TX_FLUSH_QUEUE_RSP];
> +} __packed; /* TX_PATH_FLUSH_CMD_RSP_API_S_VER_1 */
> +
> +
> +/**
>   * iwx_get_scd_ssn - returns the SSN of the SCD
>   * @tx_resp: the Tx response from the fw (agg or non-agg)
>   *
> blob - 3eb5974fc21e11962cdbbb08cd66c1451e0acd30
> blob + be876dc0b0768a84d9da2ee144efe2b6f3701d06
> --- sys/dev/pci/if_iwxvar.h
> +++ sys/dev/pci/if_iwxvar.h
> @@ -224,9 +224,6 @@ struct iwx_dma_info {
>  #define IWX_TX_RING_LOMARK     192
>  #define IWX_TX_RING_HIMARK     224
>
> -/* For aggregation queues, index must be aligned to frame sequence number. */
> -#define IWX_AGG_SSN_TO_TXQ_IDX(x)      ((x) & (IWX_TX_RING_COUNT - 1))
> -
>  struct iwx_tx_data {
>         bus_dmamap_t    map;
>         bus_addr_t      cmd_paddr;
> @@ -247,6 +244,7 @@ struct iwx_tx_ring {
>         int                     queued;
>         int                     cur;
>         int                     tail;
> +       int                     tid;
>  };
>
>  #define IWX_RX_MQ_RING_COUNT   512
> @@ -279,6 +277,7 @@ struct iwx_rx_ring {
>  #define IWX_FLAG_HW_ERR                0x80    /* hardware error occurred */
>  #define IWX_FLAG_SHUTDOWN      0x100   /* shutting down; new tasks forbidden 
> */
>  #define IWX_FLAG_BGSCAN                0x200   /* background scan in 
> progress */
> +#define IWX_FLAG_TXFLUSH       0x400   /* Tx queue flushing in progress */
>
>  struct iwx_ucode_status {
>         uint32_t uc_lmac_error_event_table[2];
> @@ -445,6 +444,11 @@ struct iwx_setkey_task_arg {
>         struct ieee80211_key *k;
>  };
>
> +struct iwx_ba_task_data {
> +       uint32_t                start_tidmask;
> +       uint32_t                stop_tidmask;
> +};
> +
>  struct iwx_softc {
>         struct device sc_dev;
>         struct ieee80211com sc_ic;
> @@ -459,11 +463,8 @@ struct iwx_softc {
>
>         /* Task for firmware BlockAck setup/teardown and its arguments. */
>         struct task             ba_task;
> -       uint32_t                ba_start_tidmask;
> -       uint32_t                ba_stop_tidmask;
> -       uint16_t                ba_ssn[IWX_MAX_TID_COUNT];
> -       uint16_t                ba_winsize[IWX_MAX_TID_COUNT];
> -       int                     ba_timeout_val[IWX_MAX_TID_COUNT];
> +       struct iwx_ba_task_data ba_rx;
> +       struct iwx_ba_task_data ba_tx;
>
>         /* Task for setting encryption keys and its arguments. */
>         struct task             setkey_task;
> @@ -492,10 +493,12 @@ struct iwx_softc {
>         int sc_msix;
>
>         /* TX/RX rings. */
> -       struct iwx_tx_ring txq[IWX_MAX_QUEUES];
> +       struct iwx_tx_ring txq[IWX_LAST_AGG_TX_QUEUE];
>         struct iwx_rx_ring rxq;
>         int qfullmsk;
> +       int qenablemsk;
>         int first_data_qid;
> +       int aggqid[IEEE80211_NUM_TID];
>
>         int sc_sf_state;
>
> blob - bf7abf058b86c4b0ad402a22c2e3f3c35e81788e
> blob + b3b30f1f98f689c6d066cff04b07b938fde0cc68
> --- sys/net80211/ieee80211_proto.c
> +++ sys/net80211/ieee80211_proto.c
> @@ -700,6 +700,18 @@ ieee80211_addba_request(struct ieee80211com *ic, struc
>                 /* immediate BA */
>                 ba->ba_params |= IEEE80211_ADDBA_BA_POLICY;
>
> +       if ((ic->ic_caps & IEEE80211_C_ADDBA_OFFLOAD) &&
> +           ic->ic_ampdu_tx_start != NULL) {
> +               int err = ic->ic_ampdu_tx_start(ic, ni, tid);
> +               if (err && err != EBUSY) {
> +                       /* driver failed to setup, rollback */
> +                       ieee80211_addba_resp_refuse(ic, ni, tid,
> +                           IEEE80211_STATUS_UNSPECIFIED);
> +               } else if (err == 0)
> +                       ieee80211_addba_resp_accept(ic, ni, tid);
> +               return err; /* The device will send an ADDBA frame. */
> +       }
> +
>         timeout_add_sec(&ba->ba_to, 1); /* dot11ADDBAResponseTimeout */
>         IEEE80211_SEND_ACTION(ic, ni, IEEE80211_CATEG_BA,
>             IEEE80211_ACTION_ADDBA_REQ, tid);
> @@ -948,6 +960,13 @@ ieee80211_stop_ampdu_tx(struct ieee80211com *ic, struc
>                 struct ieee80211_tx_ba *ba = &ni->ni_tx_ba[tid];
>                 if (ba->ba_state != IEEE80211_BA_AGREED)
>                         continue;
> +
> +               if (ic->ic_caps & IEEE80211_C_ADDBA_OFFLOAD) {
> +                       if (ic->ic_ampdu_tx_stop != NULL)
> +                               ic->ic_ampdu_tx_stop(ic, ni, tid);
> +                       continue; /* Don't change ba->ba_state! */
> +               }
> +
>                 ieee80211_delba_request(ic, ni,
>                     mgt == -1 ? 0 : IEEE80211_REASON_AUTH_LEAVE, 1, tid);
>         }
> blob - 334439887ffe54232e2ccc488b975f1cda5a26a3
> blob + 0ca42aaf5b0af1be78d69cc692c02bc9a35b123a
> --- sys/net80211/ieee80211_var.h
> +++ sys/net80211/ieee80211_var.h
> @@ -432,6 +432,7 @@ struct ieee80211_ess {
>  #define IEEE80211_C_RAWCTL     0x00004000      /* CAPABILITY: raw ctl */
>  #define IEEE80211_C_SCANALLBAND        0x00008000      /* CAPABILITY: scan 
> all bands */
>  #define IEEE80211_C_TX_AMPDU   0x00010000      /* CAPABILITY: send A-MPDU */
> +#define IEEE80211_C_ADDBA_OFFLOAD 0x00020000   /* CAPABILITY: ADDBA offload 
> */
>
>  /* flags for ieee80211_fix_rate() */
>  #define        IEEE80211_F_DOSORT      0x00000001      /* sort rate list */
>

Reply via email to