ral(4) cards using the rt2661 driver code (RT2561, RT2561S, RT2661
variants) suffer a race in TX interrupt handling which can cause
TX processing to get stuck.

This problem was previously discussed here:
http://marc.info/?l=openbsd-misc&m=125895269930106&w=2
The patch proposed there was rejected by Damien for reasons unknown to me:
http://marc.info/?l=openbsd-tech&m=126451543507085&w=2

Sepherosa Ziehau from dragonfly fixed this problem in a different way:
http://gitweb.dragonflybsd.org/dragonfly.git/commit/9dd87f8a6f730e54dfa4f21f3d9fae6a615b1908

My approach for a fix is based on Sephe's approach.

With 2661, TX interrupt processing is split between two interrupt handlers.
One that runs when the chip has offloaded the frame to the ASIC
(rt2661_tx_dma_intr) and one that runs when the ASIC reports back
about transmission success or failure (rt2661_tx_intr).

Currently, we free the mbuf for the transmitted frame during the
first interrupt (rt2661_tx_dma_intr), but do not release the STA
node reference yet and do not make space in the TX queue.

When the second interrupt handler (rt2661_tx_intr) runs, we obtain the
STA node corresponding to the transmitted frame, and update the STA node's
associated AMRR node, which contains counters used by the rate adaptation
code ("Adaptive Multi Rate Retry" algorithm).
Then space is made in the TX queue.

This approach assumes that rt2661_tx_intr and rt2661_tx_dma_intr will always
be triggered in the right amount and order. This assumption doesn't seem
to be safe. I've seen the driver end up in a situation where some entries
in the TX queue are not removed at all.

To fix this, we can keep track of which AMRR node needs to be updated when
the second interrupt handler gets to run. By not tying the AMRR node to the
STA node directly, we can release the STA node reference in the first
interrupt handler and make room in the TX queue there.

The hardware provides an 8-bit field for driver-specific data in the
TX descriptor, which can be written before TX and read back when TX
completes. This field is currently used to store the ID of the queue
a frame was transmitted from, and read back in rt2661_tx_intr to obtain,
via the queue, the STA node corresponding to the transmitted frame.

Instead of storing a queue ID, we can store an identifier for the AMRR
node that needs to be updated when the TX has completed (in case the
STA node has already been freed when the second interrupt occurs,
we free the corresponding AMRR node instead of updating it).

With the patch below I've haven't seen my ral AP lock up in 2 days,
whereas before I could trigger the problem simply by taking the laptop
to my kitchen, where signal to the AP is poor.

Can anyone confirm that this patch provides similar benefits elsewhere?
If your ral(4) device reports itself as "Ralink RT2561", "Ralink RT2561S",
or "Ralink RT2661", please test and report back. Thanks!

Index: rt2661.c
===================================================================
RCS file: /cvs/src/sys/dev/ic/rt2661.c,v
retrieving revision 1.67
diff -u -p -r1.67 rt2661.c
--- rt2661.c    17 Jul 2012 14:43:12 -0000      1.67
+++ rt2661.c    18 Jul 2012 13:38:39 -0000
@@ -34,6 +34,7 @@
 #include <sys/timeout.h>
 #include <sys/conf.h>
 #include <sys/device.h>
+#include <sys/queue.h>
 
 #include <machine/bus.h>
 #include <machine/endian.h>
@@ -57,6 +58,7 @@
 #include <net80211/ieee80211_var.h>
 #include <net80211/ieee80211_amrr.h>
 #include <net80211/ieee80211_radiotap.h>
+#include <net80211/ieee80211_node.h>
 
 #include <dev/ic/rt2661var.h>
 #include <dev/ic/rt2661reg.h>
@@ -88,6 +90,8 @@ void          rt2661_reset_rx_ring(struct rt2661
 void           rt2661_free_rx_ring(struct rt2661_softc *,
                    struct rt2661_rx_ring *);
 struct         ieee80211_node *rt2661_node_alloc(struct ieee80211com *);
+void           rt2661_node_free(struct ieee80211com *,
+                   struct ieee80211_node *);
 int            rt2661_media_change(struct ifnet *);
 void           rt2661_next_scan(void *);
 void           rt2661_iter_func(void *, struct ieee80211_node *);
@@ -115,7 +119,7 @@ uint16_t    rt2661_txtime(int, int, uint32_
 uint8_t                rt2661_plcp_signal(int);
 void           rt2661_setup_tx_desc(struct rt2661_softc *,
                    struct rt2661_tx_desc *, uint32_t, uint16_t, int, int,
-                   const bus_dma_segment_t *, int, int);
+                   const bus_dma_segment_t *, int, int, u_int8_t);
 int            rt2661_tx_mgt(struct rt2661_softc *, struct mbuf *,
                    struct ieee80211_node *);
 int            rt2661_tx_data(struct rt2661_softc *, struct mbuf *,
@@ -156,6 +160,13 @@ int                rt2661_prepare_beacon(struct rt2661
 #endif
 void           rt2661_enable_tsf_sync(struct rt2661_softc *);
 int            rt2661_get_rssi(struct rt2661_softc *, uint8_t);
+struct         rt2661_amrr_node *rt2661_amrr_node_alloc(struct ieee80211com *,
+                   struct rt2661_node *);
+void           rt2661_amrr_node_free(struct rt2661_softc *,
+                   struct rt2661_amrr_node *);
+void           rt2661_amrr_node_free_all(struct rt2661_softc *);
+struct                 rt2661_amrr_node *rt2661_amrr_node_find(struct 
rt2661_softc *,
+                   u_int8_t);
 
 static const struct {
        uint32_t        reg;
@@ -195,6 +206,8 @@ rt2661_attach(void *xsc, int id)
        timeout_set(&sc->amrr_to, rt2661_updatestats, sc);
        timeout_set(&sc->scan_to, rt2661_next_scan, sc);
 
+       TAILQ_INIT(&sc->amn);
+
        /* wait for NIC to initialize */
        for (ntries = 0; ntries < 1000; ntries++) {
                if ((val = RAL_READ(sc, RT2661_MAC_CSR0)) != 0)
@@ -344,6 +357,8 @@ rt2661_attachhook(void *xsc)
        if_attach(ifp);
        ieee80211_ifattach(ifp);
        ic->ic_node_alloc = rt2661_node_alloc;
+       sc->sc_node_free = ic->ic_node_free;
+       ic->ic_node_free = rt2661_node_free;
        ic->ic_newassoc = rt2661_newassoc;
        ic->ic_updateslot = rt2661_updateslot;
 
@@ -377,6 +392,7 @@ rt2661_detach(void *xsc)
        timeout_del(&sc->amrr_to);
 
        ieee80211_ifdetach(ifp);        /* free all nodes */
+       rt2661_amrr_node_free_all(sc);
        if_detach(ifp);
 
        for (ac = 0; ac < 4; ac++)
@@ -705,11 +721,86 @@ rt2661_free_rx_ring(struct rt2661_softc 
        }
 }
 
+struct rt2661_amrr_node *
+rt2661_amrr_node_alloc(struct ieee80211com *ic, struct rt2661_node *rn)
+{
+       struct rt2661_softc *sc = ic->ic_softc;
+       struct rt2661_amrr_node *amn;
+
+       if (sc->amn_count >= RT2661_AMRR_NODES_MAX)
+               return NULL;
+
+       amn = malloc(sizeof (struct rt2661_amrr_node), M_DEVBUF,
+           M_NOWAIT | M_ZERO);
+
+       if (amn) {
+               amn->id = sc->amn_count++;
+               amn->rn = rn;
+               TAILQ_INSERT_TAIL(&sc->amn, amn, entry);
+       }
+
+       return amn;
+}
+
+void
+rt2661_amrr_node_free(struct rt2661_softc *sc, struct rt2661_amrr_node *amn)
+{
+       TAILQ_REMOVE(&sc->amn, amn, entry);
+       sc->amn_count--;
+       free(amn, M_DEVBUF);
+}
+
+void
+rt2661_amrr_node_free_all(struct rt2661_softc *sc)
+{
+       struct rt2661_amrr_node *amn, *a;
+
+       TAILQ_FOREACH_SAFE(amn, &sc->amn, entry, a)
+               rt2661_amrr_node_free(sc, amn);
+}
+
+struct rt2661_amrr_node *
+rt2661_amrr_node_find(struct rt2661_softc *sc, u_int8_t id)
+{
+       struct rt2661_amrr_node *amn, *a, *ret = NULL;
+
+       if (id == RT2661_AMRR_INVALID_ID)
+               return NULL;
+
+       TAILQ_FOREACH_SAFE(amn, &sc->amn, entry, a) {
+               /* If the corresponding node was freed, free the amrr node. */
+               if (amn->rn == NULL)
+                       rt2661_amrr_node_free(sc, amn);
+               else if (amn->id == id)
+                       ret = amn;
+       }
+
+       return ret;
+}
+
 struct ieee80211_node *
 rt2661_node_alloc(struct ieee80211com *ic)
 {
-       return malloc(sizeof (struct rt2661_node), M_DEVBUF,
+       struct rt2661_node *rn;
+
+       rn = malloc(sizeof (struct rt2661_node), M_DEVBUF,
            M_NOWAIT | M_ZERO);
+       if (rn == NULL)
+               return NULL;
+
+       rn->amn = rt2661_amrr_node_alloc(ic, rn);
+       return (struct ieee80211_node *)rn;
+}
+
+void
+rt2661_node_free(struct ieee80211com *ic, struct ieee80211_node *ni)
+{
+       struct rt2661_softc *sc = ic->ic_softc;
+       struct rt2661_node *rn = (struct rt2661_node *)ni;
+
+       if (rn->amn)
+               rn->amn->rn = NULL;
+       sc->sc_node_free(ic, ni);
 }
 
 int
@@ -754,7 +845,8 @@ rt2661_iter_func(void *arg, struct ieee8
        struct rt2661_softc *sc = arg;
        struct rt2661_node *rn = (struct rt2661_node *)ni;
 
-       ieee80211_amrr_choose(&sc->amrr, ni, &rn->amn);
+       if (rn->amn)
+               ieee80211_amrr_choose(&sc->amrr, ni, &rn->amn->amn);
 }
 
 /*
@@ -788,7 +880,8 @@ rt2661_newassoc(struct ieee80211com *ic,
        struct rt2661_softc *sc = ic->ic_softc;
        int i;
 
-       ieee80211_amrr_node_init(&sc->amrr, &((struct rt2661_node *)ni)->amn);
+       ieee80211_amrr_node_init(&sc->amrr,
+           &((struct rt2661_node *)ni)->amn->amn);
 
        /* set rate to some reasonable initial value */
        for (i = ni->ni_rates.rs_nrates - 1;
@@ -922,32 +1015,25 @@ rt2661_eeprom_read(struct rt2661_softc *
        return val;
 }
 
+/* The TX interrupt handler accumulates statistics based on whether frames
+ * were sent successfully by the ASIC. */
 void
 rt2661_tx_intr(struct rt2661_softc *sc)
 {
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = &ic->ic_if;
-       struct rt2661_tx_ring *txq;
-       struct rt2661_tx_data *data;
-       struct rt2661_node *rn;
-       int qid, retrycnt;
+       struct rt2661_amrr_node *amn;
+       int retrycnt;
+       u_int8_t amrr_id;
 
        for (;;) {
                const uint32_t val = RAL_READ(sc, RT2661_STA_CSR4);
                if (!(val & RT2661_TX_STAT_VALID))
                        break;
 
-               /* retrieve the queue in which this frame was sent */
-               qid = RT2661_TX_QID(val);
-               txq = (qid <= 3) ? &sc->txq[qid] : &sc->mgtq;
-
                /* retrieve rate control algorithm context */
-               data = &txq->data[txq->stat];
-               rn = (struct rt2661_node *)data->ni;
-
-               /* if no frame has been sent, ignore */
-               if (rn == NULL)
-                       continue;
+               amrr_id = RT2661_TX_PRIV_DATA(val);
+               amn = rt2661_amrr_node_find(sc, amrr_id);
 
                switch (RT2661_TX_RESULT(val)) {
                case RT2661_TX_SUCCESS:
@@ -955,17 +1041,21 @@ rt2661_tx_intr(struct rt2661_softc *sc)
 
                        DPRINTFN(10, ("data frame sent successfully after "
                            "%d retries\n", retrycnt));
-                       rn->amn.amn_txcnt++;
-                       if (retrycnt > 0)
-                               rn->amn.amn_retrycnt++;
+                       if (amn) {
+                               amn->amn.amn_txcnt++;
+                               if (retrycnt > 0)
+                                       amn->amn.amn_retrycnt++;
+                       }
                        ifp->if_opackets++;
                        break;
 
                case RT2661_TX_RETRY_FAIL:
                        DPRINTFN(9, ("sending data frame failed (too much "
                            "retries)\n"));
-                       rn->amn.amn_txcnt++;
-                       rn->amn.amn_retrycnt++;
+                       if (amn) {
+                               amn->amn.amn_txcnt++;
+                               amn->amn.amn_retrycnt++;
+                       }
                        ifp->if_oerrors++;
                        break;
 
@@ -976,24 +1066,19 @@ rt2661_tx_intr(struct rt2661_softc *sc)
                        ifp->if_oerrors++;
                }
 
-               ieee80211_release_node(ic, data->ni);
-               data->ni = NULL;
-
-               DPRINTFN(15, ("tx done q=%d idx=%u\n", qid, txq->stat));
-
-               txq->queued--;
-               if (++txq->stat >= txq->count)  /* faster than % count */
-                       txq->stat = 0;
+               DPRINTFN(15, ("tx done amrr_id=%hhu amn=0x%x\n", amrr_id, amn));
        }
-
-       sc->sc_tx_timer = 0;
-       ifp->if_flags &= ~IFF_OACTIVE;
-       rt2661_start(ifp);
 }
 
+/* The TX DMA interrupt handler processes frames which have been offloaded
+ * to the ASIC for transmission. We can free all resources corresponding
+ * to the frame here. */
 void
 rt2661_tx_dma_intr(struct rt2661_softc *sc, struct rt2661_tx_ring *txq)
 {
+       struct ieee80211com *ic = &sc->sc_ic;
+       struct ifnet *ifp = &ic->ic_if;
+
        for (;;) {
                struct rt2661_tx_desc *desc = &txq->desc[txq->next];
                struct rt2661_tx_data *data = &txq->data[txq->next];
@@ -1018,13 +1103,28 @@ rt2661_tx_dma_intr(struct rt2661_softc *
                bus_dmamap_unload(sc->sc_dmat, data->map);
                m_freem(data->m);
                data->m = NULL;
-               /* node reference is released in rt2661_tx_intr() */
+               ieee80211_release_node(ic, data->ni);
+               data->ni = NULL;
 
                DPRINTFN(15, ("tx dma done q=%p idx=%u\n", txq, txq->next));
 
+               txq->queued--;
                if (++txq->next >= txq->count)  /* faster than % count */
                        txq->next = 0;
        }
+
+       if (sc->mgtq.queued == 0 && sc->txq[0].queued == 0)
+               sc->sc_tx_timer = 0;
+       if (sc->mgtq.queued < RT2661_MGT_RING_COUNT &&
+           sc->txq[0].queued < RT2661_TX_RING_COUNT - 1) {
+               if (sc->mgtq.queued < RT2661_MGT_RING_COUNT)
+                       sc->sc_flags &= ~RT2661_MGT_OACTIVE;
+               if (sc->txq[0].queued < RT2661_TX_RING_COUNT - 1)
+                       sc->sc_flags &= ~RT2661_DATA_OACTIVE;
+               if (!(sc->sc_flags & (RT2661_MGT_OACTIVE|RT2661_DATA_OACTIVE)))
+                       ifp->if_flags &= ~IFF_OACTIVE;
+               rt2661_start(ifp);
+       }
 }
 
 void
@@ -1421,7 +1521,7 @@ rt2661_plcp_signal(int rate)
 void
 rt2661_setup_tx_desc(struct rt2661_softc *sc, struct rt2661_tx_desc *desc,
     uint32_t flags, uint16_t xflags, int len, int rate,
-    const bus_dma_segment_t *segs, int nsegs, int ac)
+    const bus_dma_segment_t *segs, int nsegs, int ac, u_int8_t amrr_id)
 {
        struct ieee80211com *ic = &sc->sc_ic;
        uint16_t plcp_length;
@@ -1441,11 +1541,11 @@ rt2661_setup_tx_desc(struct rt2661_softc
            RT2661_LOGCWMAX(10));
 
        /*
-        * Remember in which queue this frame was sent. This field is driver
-        * private data only. It will be made available by the NIC in STA_CSR4
-        * on Tx interrupts.
+        * Remember the ID of the AMRR node to update when Tx completes.
+        * This field is driver private data only. It will be made available
+        * by the NIC in STA_CSR4 on Tx interrupts.
         */
-       desc->qid = ac;
+       desc->priv_data = amrr_id;
 
        /* setup PLCP fields */
        desc->plcp_signal  = rt2661_plcp_signal(rate);
@@ -1549,7 +1649,7 @@ rt2661_tx_mgt(struct rt2661_softc *sc, s
 
        rt2661_setup_tx_desc(sc, desc, flags, 0 /* XXX HWSEQ */,
            m0->m_pkthdr.len, rate, data->map->dm_segs, data->map->dm_nsegs,
-           RT2661_QID_MGT);
+           RT2661_QID_MGT, RT2661_AMRR_INVALID_ID);
 
        bus_dmamap_sync(sc->sc_dmat, data->map, 0, data->map->dm_mapsize,
            BUS_DMASYNC_PREWRITE);
@@ -1574,6 +1674,7 @@ rt2661_tx_data(struct rt2661_softc *sc, 
 {
        struct ieee80211com *ic = &sc->sc_ic;
        struct rt2661_tx_ring *txq = &sc->txq[ac];
+       struct rt2661_node *rn;
        struct rt2661_tx_desc *desc;
        struct rt2661_tx_data *data;
        struct ieee80211_frame *wh;
@@ -1583,6 +1684,7 @@ rt2661_tx_data(struct rt2661_softc *sc, 
        uint32_t flags = 0;
        int pktlen, rate, needcts = 0, needrts = 0, error;
 
+       rn = ((ni == ic->ic_bss) ? NULL : (struct rt2661_node *)ni);
        wh = mtod(m0, struct ieee80211_frame *);
 
        if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) {
@@ -1677,7 +1779,8 @@ rt2661_tx_data(struct rt2661_softc *sc, 
                rt2661_setup_tx_desc(sc, desc,
                    (needrts ? RT2661_TX_NEED_ACK : 0) | RT2661_TX_MORE_FRAG,
                    0, mprot->m_pkthdr.len, protrate, data->map->dm_segs,
-                   data->map->dm_nsegs, ac);
+                   data->map->dm_nsegs, ac,
+                   (rn && rn->amn) ? rn->amn->id : RT2661_AMRR_INVALID_ID);
 
                bus_dmamap_sync(sc->sc_dmat, data->map, 0,
                    data->map->dm_mapsize, BUS_DMASYNC_PREWRITE);
@@ -1767,7 +1870,8 @@ rt2661_tx_data(struct rt2661_softc *sc, 
        }
 
        rt2661_setup_tx_desc(sc, desc, flags, 0, m0->m_pkthdr.len, rate,
-           data->map->dm_segs, data->map->dm_nsegs, ac);
+           data->map->dm_segs, data->map->dm_nsegs, ac,
+           (rn && rn->amn) ? rn->amn->id : RT2661_AMRR_INVALID_ID);
 
        bus_dmamap_sync(sc->sc_dmat, data->map, 0, data->map->dm_mapsize,
            BUS_DMASYNC_PREWRITE);
@@ -2606,6 +2710,7 @@ rt2661_stop(struct ifnet *ifp, int disab
        ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
 
        ieee80211_new_state(ic, IEEE80211_S_INIT, -1);  /* free all nodes */
+       rt2661_amrr_node_free_all(sc);
 
        /* abort Tx (for all 5 Tx rings) */
        RAL_WRITE(sc, RT2661_TX_CNTL_CSR, 0x1f << 16);
@@ -2816,7 +2921,8 @@ rt2661_prepare_beacon(struct rt2661_soft
        rate = IEEE80211_IS_CHAN_5GHZ(ni->ni_chan) ? 12 : 2;
 
        rt2661_setup_tx_desc(sc, &desc, RT2661_TX_TIMESTAMP, RT2661_TX_HWSEQ,
-           m0->m_pkthdr.len, rate, NULL, 0, RT2661_QID_MGT);
+           m0->m_pkthdr.len, rate, NULL, 0, RT2661_QID_MGT,
+           RT2661_AMRR_INVALID_ID);
 
        /* copy the first 24 bytes of Tx descriptor into NIC memory */
        RAL_WRITE_REGION_1(sc, RT2661_HW_BEACON_BASE0, (uint8_t *)&desc, 24);
Index: rt2661reg.h
===================================================================
RCS file: /cvs/src/sys/dev/ic/rt2661reg.h,v
retrieving revision 1.11
diff -u -p -r1.11 rt2661reg.h
--- rt2661reg.h 14 Feb 2010 09:20:34 -0000      1.11
+++ rt2661reg.h 17 Jul 2012 14:03:17 -0000
@@ -189,7 +189,9 @@
 #define RT2661_TX_STAT_VALID   (1 << 0)
 #define RT2661_TX_RESULT(v)    (((v) >> 1) & 0x7)
 #define RT2661_TX_RETRYCNT(v)  (((v) >> 4) & 0xf)
-#define RT2661_TX_QID(v)       (((v) >> 8) & 0xf)
+/* Driver-private data written before TX and read back when TX completes.
+ * We store the driver-private ID of an AMRR node in here. */
+#define RT2661_TX_PRIV_DATA(v) (((v) >> 8) & 0xff)
 #define RT2661_TX_SUCCESS      0
 #define RT2661_TX_RETRY_FAIL   6
 
@@ -245,7 +247,7 @@ struct rt2661_tx_desc {
        uint32_t        eiv;
 
        uint8_t         offset;
-       uint8_t         qid;
+       uint8_t         priv_data;
 #define RT2661_QID_MGT 13
 
        uint8_t         txpower;
Index: rt2661var.h
===================================================================
RCS file: /cvs/src/sys/dev/ic/rt2661var.h,v
retrieving revision 1.16
diff -u -p -r1.16 rt2661var.h
--- rt2661var.h 17 Jul 2012 14:43:12 -0000      1.16
+++ rt2661var.h 17 Jul 2012 16:10:42 -0000
@@ -82,9 +82,19 @@ struct rt2661_rx_ring {
        int                     next;
 };
 
+#define RT2661_AMRR_NODES_MAX  100 /* based on IEEE80211_CACHE_SIZE */
+#define RT2661_AMRR_INVALID_ID (RT2661_AMRR_NODES_MAX + 1)
+
+struct rt2661_amrr_node {
+       struct ieee80211_amrr_node      amn;
+       struct rt2661_node              *rn;
+       u_int8_t                        id;
+       TAILQ_ENTRY(rt2661_amrr_node)   entry;
+};
+
 struct rt2661_node {
        struct ieee80211_node           ni;
-       struct ieee80211_amrr_node      amn;
+       struct rt2661_amrr_node         *amn;
 };
 
 struct rt2661_softc {
@@ -111,6 +121,8 @@ struct rt2661_softc {
 #define RT2661_UPDATE_SLOT     (1 << 1)
 #define RT2661_SET_SLOTTIME    (1 << 2)
 #define RT2661_FWLOADED                (1 << 3)
+#define RT2661_MGT_OACTIVE     (1 << 4)
+#define RT2661_DATA_OACTIVE    (1 << 5)
 
        int                             sc_tx_timer;
 
@@ -175,6 +187,10 @@ struct rt2661_softc {
 #define sc_txtap                       sc_txtapu.th
        int                             sc_txtap_len;
 #endif
+       void                            (*sc_node_free)(struct ieee80211com *,
+                                           struct ieee80211_node *);
+       TAILQ_HEAD(, rt2661_amrr_node)  amn;
+       u_int8_t                        amn_count;
 };
 
 int    rt2661_attach(void *, int);

Reply via email to