On Mon, Mar 24, 2014 at 06:35:29PM -0700, andy wrote: > hello - > > i've been using a soekris net5501 as a home gateway since early 2008, > starting w/openbsd 4.2 and upgrading through 5.4. for most of that time > it's also been serving as a wireless access point. the wireless card is > a SparkLAN WMIR-168AG WLAN 802.11a/b/g Mini PCI Module with the Ralink > RT2561T chipset (ral driver; dmesg.boot attached). the system has been > working reliably for years. > > however, the box started to hang within days of upgrading to 5.4. it > stays responsive for a variable length of time after reboot, ranging > from minutes to a week or more (but not much more). and unfortunately, > it hangs w/o writing anything to syslog or serial console. i enabled > ddb.console in sysctl.conf but found it to be completely unresponsive > when hung (i successfully tested sending a break in normal operation).
The diff below backs out my changes for ral from 5.3->5.4. Can you test this? I doubt it will have any effect but if it does I'd very much like to know about it. > i've merged the patches from the 5.4 release errata & patch list and > rebuilt the os to no effect. there was some correlation between the > hangs and increased wireless usage; i tried disabling pf and squid but > the hangs continued. eventually i ran `ifconfig ral0 down` and hooked > the laptops up to a switch. rock-solid for weeks. brought ral0 back up > and within days of usage the box hung again. i see at least one person > w/similar symptoms from 2011[1] but nothing more recent. It is possible that your power supply is having issues. With my net5501 I was seeing hard lockups until I upgraded to a stronger power supply (same voltage, more ampere). The default power supply couldn't power the board, a hard disk, and a wireless minipci card (also a ral rt2661 in my case). You seem to be using a hard disk instead of a CF card, correct? If you like I can look up the exact specs of the power supply I'm using tonight. Diff to back out the 'tx interrupt race' fix: Index: rt2661.c =================================================================== RCS file: /cvs/src/sys/dev/ic/rt2661.c,v retrieving revision 1.68 retrieving revision 1.67 diff -u -p -r1.68 -r1.67 --- rt2661.c 23 Aug 2012 10:34:25 -0000 1.68 +++ rt2661.c 17 Jul 2012 14:43:12 -0000 1.67 @@ -34,7 +34,6 @@ #include <sys/timeout.h> #include <sys/conf.h> #include <sys/device.h> -#include <sys/queue.h> #include <machine/bus.h> #include <machine/endian.h> @@ -58,7 +57,6 @@ #include <net80211/ieee80211_var.h> #include <net80211/ieee80211_amrr.h> #include <net80211/ieee80211_radiotap.h> -#include <net80211/ieee80211_node.h> #include <dev/ic/rt2661var.h> #include <dev/ic/rt2661reg.h> @@ -90,8 +88,6 @@ void rt2661_reset_rx_ring(struct rt2661 void rt2661_free_rx_ring(struct rt2661_softc *, struct rt2661_rx_ring *); struct ieee80211_node *rt2661_node_alloc(struct ieee80211com *); -void rt2661_node_free(struct ieee80211com *, - struct ieee80211_node *); int rt2661_media_change(struct ifnet *); void rt2661_next_scan(void *); void rt2661_iter_func(void *, struct ieee80211_node *); @@ -119,7 +115,7 @@ uint16_t rt2661_txtime(int, int, uint32_ uint8_t rt2661_plcp_signal(int); void rt2661_setup_tx_desc(struct rt2661_softc *, struct rt2661_tx_desc *, uint32_t, uint16_t, int, int, - const bus_dma_segment_t *, int, int, u_int8_t); + const bus_dma_segment_t *, int, int); int rt2661_tx_mgt(struct rt2661_softc *, struct mbuf *, struct ieee80211_node *); int rt2661_tx_data(struct rt2661_softc *, struct mbuf *, @@ -160,14 +156,6 @@ int rt2661_prepare_beacon(struct rt2661 #endif void rt2661_enable_tsf_sync(struct rt2661_softc *); int rt2661_get_rssi(struct rt2661_softc *, uint8_t); -struct rt2661_amrr_node *rt2661_amrr_node_alloc(struct ieee80211com *, - struct rt2661_node *); -void rt2661_amrr_node_free(struct rt2661_softc *, - struct rt2661_amrr_node *); -void rt2661_amrr_node_free_all(struct rt2661_softc *); -void rt2661_amrr_node_free_unused(struct rt2661_softc *); -struct rt2661_amrr_node *rt2661_amrr_node_find(struct rt2661_softc *, - u_int8_t); static const struct { uint32_t reg; @@ -207,8 +195,6 @@ rt2661_attach(void *xsc, int id) timeout_set(&sc->amrr_to, rt2661_updatestats, sc); timeout_set(&sc->scan_to, rt2661_next_scan, sc); - TAILQ_INIT(&sc->amn); - /* wait for NIC to initialize */ for (ntries = 0; ntries < 1000; ntries++) { if ((val = RAL_READ(sc, RT2661_MAC_CSR0)) != 0) @@ -358,8 +344,6 @@ rt2661_attachhook(void *xsc) if_attach(ifp); ieee80211_ifattach(ifp); ic->ic_node_alloc = rt2661_node_alloc; - sc->sc_node_free = ic->ic_node_free; - ic->ic_node_free = rt2661_node_free; ic->ic_newassoc = rt2661_newassoc; ic->ic_updateslot = rt2661_updateslot; @@ -393,7 +377,6 @@ rt2661_detach(void *xsc) timeout_del(&sc->amrr_to); ieee80211_ifdetach(ifp); /* free all nodes */ - rt2661_amrr_node_free_all(sc); if_detach(ifp); for (ac = 0; ac < 4; ac++) @@ -722,117 +705,11 @@ rt2661_free_rx_ring(struct rt2661_softc } } -struct rt2661_amrr_node * -rt2661_amrr_node_alloc(struct ieee80211com *ic, struct rt2661_node *rn) -{ - struct rt2661_softc *sc = ic->ic_softc; - struct rt2661_amrr_node *amn; - int s; - - if (sc->amn_count >= RT2661_AMRR_NODES_MAX) - rt2661_amrr_node_free_unused(sc); - if (sc->amn_count >= RT2661_AMRR_NODES_MAX) - return NULL; - - amn = malloc(sizeof (struct rt2661_amrr_node), M_DEVBUF, - M_NOWAIT | M_ZERO); - - if (amn) { - s = splnet(); - amn->id = sc->amn_count++; - amn->rn = rn; - TAILQ_INSERT_TAIL(&sc->amn, amn, entry); - splx(s); - } - - return amn; -} - -void -rt2661_amrr_node_free(struct rt2661_softc *sc, struct rt2661_amrr_node *amn) -{ - int s; - - s = splnet(); - if (amn->rn) - amn->rn->amn = NULL; - TAILQ_REMOVE(&sc->amn, amn, entry); - sc->amn_count--; - splx(s); - free(amn, M_DEVBUF); -} - -void -rt2661_amrr_node_free_all(struct rt2661_softc *sc) -{ - struct rt2661_amrr_node *amn, *a; - int s; - - s = splnet(); - TAILQ_FOREACH_SAFE(amn, &sc->amn, entry, a) - rt2661_amrr_node_free(sc, amn); - splx(s); -} - -void -rt2661_amrr_node_free_unused(struct rt2661_softc *sc) -{ - struct rt2661_amrr_node *amn, *a; - int s; - - s = splnet(); - TAILQ_FOREACH_SAFE(amn, &sc->amn, entry, a) { - if (amn->rn == NULL) - rt2661_amrr_node_free(sc, amn); - } - splx(s); -} - -struct rt2661_amrr_node * -rt2661_amrr_node_find(struct rt2661_softc *sc, u_int8_t id) -{ - struct rt2661_amrr_node *amn, *a, *ret = NULL; - int s; - - if (id == RT2661_AMRR_INVALID_ID) - return NULL; - - s = splnet(); - TAILQ_FOREACH_SAFE(amn, &sc->amn, entry, a) { - /* If the corresponding node was freed, free the amrr node. */ - if (amn->rn == NULL) - rt2661_amrr_node_free(sc, amn); - else if (amn->id == id) - ret = amn; - } - splx(s); - - return ret; -} - struct ieee80211_node * rt2661_node_alloc(struct ieee80211com *ic) { - struct rt2661_node *rn; - - rn = malloc(sizeof (struct rt2661_node), M_DEVBUF, + return malloc(sizeof (struct rt2661_node), M_DEVBUF, M_NOWAIT | M_ZERO); - if (rn == NULL) - return NULL; - - rn->amn = rt2661_amrr_node_alloc(ic, rn); - return (struct ieee80211_node *)rn; -} - -void -rt2661_node_free(struct ieee80211com *ic, struct ieee80211_node *ni) -{ - struct rt2661_softc *sc = ic->ic_softc; - struct rt2661_node *rn = (struct rt2661_node *)ni; - - if (rn->amn) - rn->amn->rn = NULL; - sc->sc_node_free(ic, ni); } int @@ -877,8 +754,7 @@ rt2661_iter_func(void *arg, struct ieee8 struct rt2661_softc *sc = arg; struct rt2661_node *rn = (struct rt2661_node *)ni; - if (rn->amn) - ieee80211_amrr_choose(&sc->amrr, ni, &rn->amn->amn); + ieee80211_amrr_choose(&sc->amrr, ni, &rn->amn); } /* @@ -898,11 +774,9 @@ rt2661_updatestats(void *arg) else ieee80211_iterate_nodes(ic, rt2661_iter_func, arg); - /* update rx sensitivity and free unused amrr nodes every 1 sec */ - if (++sc->ncalls & 1) { + /* update rx sensitivity every 1 sec */ + if (++sc->ncalls & 1) rt2661_rx_tune(sc); - rt2661_amrr_node_free_unused(sc); - } splx(s); timeout_add_msec(&sc->amrr_to, 500); @@ -912,11 +786,9 @@ void rt2661_newassoc(struct ieee80211com *ic, struct ieee80211_node *ni, int isnew) { struct rt2661_softc *sc = ic->ic_softc; - struct rt2661_node *rn = (struct rt2661_node *)ni; int i; - if (rn->amn) - ieee80211_amrr_node_init(&sc->amrr, &rn->amn->amn); + ieee80211_amrr_node_init(&sc->amrr, &((struct rt2661_node *)ni)->amn); /* set rate to some reasonable initial value */ for (i = ni->ni_rates.rs_nrates - 1; @@ -1050,25 +922,32 @@ rt2661_eeprom_read(struct rt2661_softc * return val; } -/* The TX interrupt handler accumulates statistics based on whether frames - * were sent successfully by the ASIC. */ void rt2661_tx_intr(struct rt2661_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct ifnet *ifp = &ic->ic_if; - struct rt2661_amrr_node *amn; - int retrycnt; - u_int8_t amrr_id; + struct rt2661_tx_ring *txq; + struct rt2661_tx_data *data; + struct rt2661_node *rn; + int qid, retrycnt; for (;;) { const uint32_t val = RAL_READ(sc, RT2661_STA_CSR4); if (!(val & RT2661_TX_STAT_VALID)) break; + /* retrieve the queue in which this frame was sent */ + qid = RT2661_TX_QID(val); + txq = (qid <= 3) ? &sc->txq[qid] : &sc->mgtq; + /* retrieve rate control algorithm context */ - amrr_id = RT2661_TX_PRIV_DATA(val); - amn = rt2661_amrr_node_find(sc, amrr_id); + data = &txq->data[txq->stat]; + rn = (struct rt2661_node *)data->ni; + + /* if no frame has been sent, ignore */ + if (rn == NULL) + continue; switch (RT2661_TX_RESULT(val)) { case RT2661_TX_SUCCESS: @@ -1076,21 +955,17 @@ rt2661_tx_intr(struct rt2661_softc *sc) DPRINTFN(10, ("data frame sent successfully after " "%d retries\n", retrycnt)); - if (amn) { - amn->amn.amn_txcnt++; - if (retrycnt > 0) - amn->amn.amn_retrycnt++; - } + rn->amn.amn_txcnt++; + if (retrycnt > 0) + rn->amn.amn_retrycnt++; ifp->if_opackets++; break; case RT2661_TX_RETRY_FAIL: DPRINTFN(9, ("sending data frame failed (too much " "retries)\n")); - if (amn) { - amn->amn.amn_txcnt++; - amn->amn.amn_retrycnt++; - } + rn->amn.amn_txcnt++; + rn->amn.amn_retrycnt++; ifp->if_oerrors++; break; @@ -1101,19 +976,24 @@ rt2661_tx_intr(struct rt2661_softc *sc) ifp->if_oerrors++; } - DPRINTFN(15, ("tx done amrr_id=%hhu amn=0x%x\n", amrr_id, amn)); + ieee80211_release_node(ic, data->ni); + data->ni = NULL; + + DPRINTFN(15, ("tx done q=%d idx=%u\n", qid, txq->stat)); + + txq->queued--; + if (++txq->stat >= txq->count) /* faster than % count */ + txq->stat = 0; } + + sc->sc_tx_timer = 0; + ifp->if_flags &= ~IFF_OACTIVE; + rt2661_start(ifp); } -/* The TX DMA interrupt handler processes frames which have been offloaded - * to the ASIC for transmission. We can free all resources corresponding - * to the frame here. */ void rt2661_tx_dma_intr(struct rt2661_softc *sc, struct rt2661_tx_ring *txq) { - struct ieee80211com *ic = &sc->sc_ic; - struct ifnet *ifp = &ic->ic_if; - for (;;) { struct rt2661_tx_desc *desc = &txq->desc[txq->next]; struct rt2661_tx_data *data = &txq->data[txq->next]; @@ -1138,28 +1018,13 @@ rt2661_tx_dma_intr(struct rt2661_softc * bus_dmamap_unload(sc->sc_dmat, data->map); m_freem(data->m); data->m = NULL; - ieee80211_release_node(ic, data->ni); - data->ni = NULL; + /* node reference is released in rt2661_tx_intr() */ DPRINTFN(15, ("tx dma done q=%p idx=%u\n", txq, txq->next)); - txq->queued--; if (++txq->next >= txq->count) /* faster than % count */ txq->next = 0; } - - if (sc->mgtq.queued == 0 && sc->txq[0].queued == 0) - sc->sc_tx_timer = 0; - if (sc->mgtq.queued < RT2661_MGT_RING_COUNT && - sc->txq[0].queued < RT2661_TX_RING_COUNT - 1) { - if (sc->mgtq.queued < RT2661_MGT_RING_COUNT) - sc->sc_flags &= ~RT2661_MGT_OACTIVE; - if (sc->txq[0].queued < RT2661_TX_RING_COUNT - 1) - sc->sc_flags &= ~RT2661_DATA_OACTIVE; - if (!(sc->sc_flags & (RT2661_MGT_OACTIVE|RT2661_DATA_OACTIVE))) - ifp->if_flags &= ~IFF_OACTIVE; - rt2661_start(ifp); - } } void @@ -1556,7 +1421,7 @@ rt2661_plcp_signal(int rate) void rt2661_setup_tx_desc(struct rt2661_softc *sc, struct rt2661_tx_desc *desc, uint32_t flags, uint16_t xflags, int len, int rate, - const bus_dma_segment_t *segs, int nsegs, int ac, u_int8_t amrr_id) + const bus_dma_segment_t *segs, int nsegs, int ac) { struct ieee80211com *ic = &sc->sc_ic; uint16_t plcp_length; @@ -1576,11 +1441,11 @@ rt2661_setup_tx_desc(struct rt2661_softc RT2661_LOGCWMAX(10)); /* - * Remember the ID of the AMRR node to update when Tx completes. - * This field is driver private data only. It will be made available - * by the NIC in STA_CSR4 on Tx interrupts. + * Remember in which queue this frame was sent. This field is driver + * private data only. It will be made available by the NIC in STA_CSR4 + * on Tx interrupts. */ - desc->priv_data = amrr_id; + desc->qid = ac; /* setup PLCP fields */ desc->plcp_signal = rt2661_plcp_signal(rate); @@ -1684,7 +1549,7 @@ rt2661_tx_mgt(struct rt2661_softc *sc, s rt2661_setup_tx_desc(sc, desc, flags, 0 /* XXX HWSEQ */, m0->m_pkthdr.len, rate, data->map->dm_segs, data->map->dm_nsegs, - RT2661_QID_MGT, RT2661_AMRR_INVALID_ID); + RT2661_QID_MGT); bus_dmamap_sync(sc->sc_dmat, data->map, 0, data->map->dm_mapsize, BUS_DMASYNC_PREWRITE); @@ -1709,7 +1574,6 @@ rt2661_tx_data(struct rt2661_softc *sc, { struct ieee80211com *ic = &sc->sc_ic; struct rt2661_tx_ring *txq = &sc->txq[ac]; - struct rt2661_node *rn; struct rt2661_tx_desc *desc; struct rt2661_tx_data *data; struct ieee80211_frame *wh; @@ -1719,7 +1583,6 @@ rt2661_tx_data(struct rt2661_softc *sc, uint32_t flags = 0; int pktlen, rate, needcts = 0, needrts = 0, error; - rn = ((ni == ic->ic_bss) ? NULL : (struct rt2661_node *)ni); wh = mtod(m0, struct ieee80211_frame *); if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) { @@ -1814,8 +1677,7 @@ rt2661_tx_data(struct rt2661_softc *sc, rt2661_setup_tx_desc(sc, desc, (needrts ? RT2661_TX_NEED_ACK : 0) | RT2661_TX_MORE_FRAG, 0, mprot->m_pkthdr.len, protrate, data->map->dm_segs, - data->map->dm_nsegs, ac, - (rn && rn->amn) ? rn->amn->id : RT2661_AMRR_INVALID_ID); + data->map->dm_nsegs, ac); bus_dmamap_sync(sc->sc_dmat, data->map, 0, data->map->dm_mapsize, BUS_DMASYNC_PREWRITE); @@ -1905,8 +1767,7 @@ rt2661_tx_data(struct rt2661_softc *sc, } rt2661_setup_tx_desc(sc, desc, flags, 0, m0->m_pkthdr.len, rate, - data->map->dm_segs, data->map->dm_nsegs, ac, - (rn && rn->amn) ? rn->amn->id : RT2661_AMRR_INVALID_ID); + data->map->dm_segs, data->map->dm_nsegs, ac); bus_dmamap_sync(sc->sc_dmat, data->map, 0, data->map->dm_mapsize, BUS_DMASYNC_PREWRITE); @@ -2745,7 +2606,6 @@ rt2661_stop(struct ifnet *ifp, int disab ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE); ieee80211_new_state(ic, IEEE80211_S_INIT, -1); /* free all nodes */ - rt2661_amrr_node_free_all(sc); /* abort Tx (for all 5 Tx rings) */ RAL_WRITE(sc, RT2661_TX_CNTL_CSR, 0x1f << 16); @@ -2956,8 +2816,7 @@ rt2661_prepare_beacon(struct rt2661_soft rate = IEEE80211_IS_CHAN_5GHZ(ni->ni_chan) ? 12 : 2; rt2661_setup_tx_desc(sc, &desc, RT2661_TX_TIMESTAMP, RT2661_TX_HWSEQ, - m0->m_pkthdr.len, rate, NULL, 0, RT2661_QID_MGT, - RT2661_AMRR_INVALID_ID); + m0->m_pkthdr.len, rate, NULL, 0, RT2661_QID_MGT); /* copy the first 24 bytes of Tx descriptor into NIC memory */ RAL_WRITE_REGION_1(sc, RT2661_HW_BEACON_BASE0, (uint8_t *)&desc, 24); Index: rt2661reg.h =================================================================== RCS file: /cvs/src/sys/dev/ic/rt2661reg.h,v retrieving revision 1.12 retrieving revision 1.11 diff -u -p -r1.12 -r1.11 --- rt2661reg.h 23 Aug 2012 10:34:25 -0000 1.12 +++ rt2661reg.h 14 Feb 2010 09:20:34 -0000 1.11 @@ -189,9 +189,7 @@ #define RT2661_TX_STAT_VALID (1 << 0) #define RT2661_TX_RESULT(v) (((v) >> 1) & 0x7) #define RT2661_TX_RETRYCNT(v) (((v) >> 4) & 0xf) -/* Driver-private data written before TX and read back when TX completes. - * We store the driver-private ID of an AMRR node in here. */ -#define RT2661_TX_PRIV_DATA(v) (((v) >> 8) & 0xff) +#define RT2661_TX_QID(v) (((v) >> 8) & 0xf) #define RT2661_TX_SUCCESS 0 #define RT2661_TX_RETRY_FAIL 6 @@ -247,7 +245,7 @@ struct rt2661_tx_desc { uint32_t eiv; uint8_t offset; - uint8_t priv_data; + uint8_t qid; #define RT2661_QID_MGT 13 uint8_t txpower; Index: rt2661var.h =================================================================== RCS file: /cvs/src/sys/dev/ic/rt2661var.h,v retrieving revision 1.17 retrieving revision 1.16 diff -u -p -r1.17 -r1.16 --- rt2661var.h 23 Aug 2012 10:34:25 -0000 1.17 +++ rt2661var.h 17 Jul 2012 14:43:12 -0000 1.16 @@ -82,19 +82,9 @@ struct rt2661_rx_ring { int next; }; -#define RT2661_AMRR_NODES_MAX 100 /* based on IEEE80211_CACHE_SIZE */ -#define RT2661_AMRR_INVALID_ID (RT2661_AMRR_NODES_MAX + 1) - -struct rt2661_amrr_node { - struct ieee80211_amrr_node amn; - struct rt2661_node *rn; - u_int8_t id; - TAILQ_ENTRY(rt2661_amrr_node) entry; -}; - struct rt2661_node { struct ieee80211_node ni; - struct rt2661_amrr_node *amn; + struct ieee80211_amrr_node amn; }; struct rt2661_softc { @@ -121,8 +111,6 @@ struct rt2661_softc { #define RT2661_UPDATE_SLOT (1 << 1) #define RT2661_SET_SLOTTIME (1 << 2) #define RT2661_FWLOADED (1 << 3) -#define RT2661_MGT_OACTIVE (1 << 4) -#define RT2661_DATA_OACTIVE (1 << 5) int sc_tx_timer; @@ -187,10 +175,6 @@ struct rt2661_softc { #define sc_txtap sc_txtapu.th int sc_txtap_len; #endif - void (*sc_node_free)(struct ieee80211com *, - struct ieee80211_node *); - TAILQ_HEAD(, rt2661_amrr_node) amn; - u_int8_t amn_count; }; int rt2661_attach(void *, int);