On Mon, Mar 24, 2014 at 06:35:29PM -0700, andy wrote:
> hello -
>
> i've been using a soekris net5501 as a home gateway since early 2008,
> starting w/openbsd 4.2 and upgrading through 5.4. for most of that time
> it's also been serving as a wireless access point. the wireless card is
> a SparkLAN WMIR-168AG WLAN 802.11a/b/g Mini PCI Module with the Ralink
> RT2561T chipset (ral driver; dmesg.boot attached). the system has been
> working reliably for years.
>
> however, the box started to hang within days of upgrading to 5.4. it
> stays responsive for a variable length of time after reboot, ranging
> from minutes to a week or more (but not much more). and unfortunately,
> it hangs w/o writing anything to syslog or serial console. i enabled
> ddb.console in sysctl.conf but found it to be completely unresponsive
> when hung (i successfully tested sending a break in normal operation).
The diff below backs out my changes for ral from 5.3->5.4.
Can you test this? I doubt it will have any effect but if it does
I'd very much like to know about it.
> i've merged the patches from the 5.4 release errata & patch list and
> rebuilt the os to no effect. there was some correlation between the
> hangs and increased wireless usage; i tried disabling pf and squid but
> the hangs continued. eventually i ran `ifconfig ral0 down` and hooked
> the laptops up to a switch. rock-solid for weeks. brought ral0 back up
> and within days of usage the box hung again. i see at least one person
> w/similar symptoms from 2011[1] but nothing more recent.
It is possible that your power supply is having issues.
With my net5501 I was seeing hard lockups until I upgraded to a stronger
power supply (same voltage, more ampere). The default power supply couldn't
power the board, a hard disk, and a wireless minipci card (also a ral rt2661
in my case). You seem to be using a hard disk instead of a CF card, correct?
If you like I can look up the exact specs of the power supply I'm using tonight.
Diff to back out the 'tx interrupt race' fix:
Index: rt2661.c
===================================================================
RCS file: /cvs/src/sys/dev/ic/rt2661.c,v
retrieving revision 1.68
retrieving revision 1.67
diff -u -p -r1.68 -r1.67
--- rt2661.c 23 Aug 2012 10:34:25 -0000 1.68
+++ rt2661.c 17 Jul 2012 14:43:12 -0000 1.67
@@ -34,7 +34,6 @@
#include <sys/timeout.h>
#include <sys/conf.h>
#include <sys/device.h>
-#include <sys/queue.h>
#include <machine/bus.h>
#include <machine/endian.h>
@@ -58,7 +57,6 @@
#include <net80211/ieee80211_var.h>
#include <net80211/ieee80211_amrr.h>
#include <net80211/ieee80211_radiotap.h>
-#include <net80211/ieee80211_node.h>
#include <dev/ic/rt2661var.h>
#include <dev/ic/rt2661reg.h>
@@ -90,8 +88,6 @@ void rt2661_reset_rx_ring(struct rt2661
void rt2661_free_rx_ring(struct rt2661_softc *,
struct rt2661_rx_ring *);
struct ieee80211_node *rt2661_node_alloc(struct ieee80211com *);
-void rt2661_node_free(struct ieee80211com *,
- struct ieee80211_node *);
int rt2661_media_change(struct ifnet *);
void rt2661_next_scan(void *);
void rt2661_iter_func(void *, struct ieee80211_node *);
@@ -119,7 +115,7 @@ uint16_t rt2661_txtime(int, int, uint32_
uint8_t rt2661_plcp_signal(int);
void rt2661_setup_tx_desc(struct rt2661_softc *,
struct rt2661_tx_desc *, uint32_t, uint16_t, int, int,
- const bus_dma_segment_t *, int, int, u_int8_t);
+ const bus_dma_segment_t *, int, int);
int rt2661_tx_mgt(struct rt2661_softc *, struct mbuf *,
struct ieee80211_node *);
int rt2661_tx_data(struct rt2661_softc *, struct mbuf *,
@@ -160,14 +156,6 @@ int rt2661_prepare_beacon(struct rt2661
#endif
void rt2661_enable_tsf_sync(struct rt2661_softc *);
int rt2661_get_rssi(struct rt2661_softc *, uint8_t);
-struct rt2661_amrr_node *rt2661_amrr_node_alloc(struct ieee80211com *,
- struct rt2661_node *);
-void rt2661_amrr_node_free(struct rt2661_softc *,
- struct rt2661_amrr_node *);
-void rt2661_amrr_node_free_all(struct rt2661_softc *);
-void rt2661_amrr_node_free_unused(struct rt2661_softc *);
-struct rt2661_amrr_node *rt2661_amrr_node_find(struct
rt2661_softc *,
- u_int8_t);
static const struct {
uint32_t reg;
@@ -207,8 +195,6 @@ rt2661_attach(void *xsc, int id)
timeout_set(&sc->amrr_to, rt2661_updatestats, sc);
timeout_set(&sc->scan_to, rt2661_next_scan, sc);
- TAILQ_INIT(&sc->amn);
-
/* wait for NIC to initialize */
for (ntries = 0; ntries < 1000; ntries++) {
if ((val = RAL_READ(sc, RT2661_MAC_CSR0)) != 0)
@@ -358,8 +344,6 @@ rt2661_attachhook(void *xsc)
if_attach(ifp);
ieee80211_ifattach(ifp);
ic->ic_node_alloc = rt2661_node_alloc;
- sc->sc_node_free = ic->ic_node_free;
- ic->ic_node_free = rt2661_node_free;
ic->ic_newassoc = rt2661_newassoc;
ic->ic_updateslot = rt2661_updateslot;
@@ -393,7 +377,6 @@ rt2661_detach(void *xsc)
timeout_del(&sc->amrr_to);
ieee80211_ifdetach(ifp); /* free all nodes */
- rt2661_amrr_node_free_all(sc);
if_detach(ifp);
for (ac = 0; ac < 4; ac++)
@@ -722,117 +705,11 @@ rt2661_free_rx_ring(struct rt2661_softc
}
}
-struct rt2661_amrr_node *
-rt2661_amrr_node_alloc(struct ieee80211com *ic, struct rt2661_node *rn)
-{
- struct rt2661_softc *sc = ic->ic_softc;
- struct rt2661_amrr_node *amn;
- int s;
-
- if (sc->amn_count >= RT2661_AMRR_NODES_MAX)
- rt2661_amrr_node_free_unused(sc);
- if (sc->amn_count >= RT2661_AMRR_NODES_MAX)
- return NULL;
-
- amn = malloc(sizeof (struct rt2661_amrr_node), M_DEVBUF,
- M_NOWAIT | M_ZERO);
-
- if (amn) {
- s = splnet();
- amn->id = sc->amn_count++;
- amn->rn = rn;
- TAILQ_INSERT_TAIL(&sc->amn, amn, entry);
- splx(s);
- }
-
- return amn;
-}
-
-void
-rt2661_amrr_node_free(struct rt2661_softc *sc, struct rt2661_amrr_node *amn)
-{
- int s;
-
- s = splnet();
- if (amn->rn)
- amn->rn->amn = NULL;
- TAILQ_REMOVE(&sc->amn, amn, entry);
- sc->amn_count--;
- splx(s);
- free(amn, M_DEVBUF);
-}
-
-void
-rt2661_amrr_node_free_all(struct rt2661_softc *sc)
-{
- struct rt2661_amrr_node *amn, *a;
- int s;
-
- s = splnet();
- TAILQ_FOREACH_SAFE(amn, &sc->amn, entry, a)
- rt2661_amrr_node_free(sc, amn);
- splx(s);
-}
-
-void
-rt2661_amrr_node_free_unused(struct rt2661_softc *sc)
-{
- struct rt2661_amrr_node *amn, *a;
- int s;
-
- s = splnet();
- TAILQ_FOREACH_SAFE(amn, &sc->amn, entry, a) {
- if (amn->rn == NULL)
- rt2661_amrr_node_free(sc, amn);
- }
- splx(s);
-}
-
-struct rt2661_amrr_node *
-rt2661_amrr_node_find(struct rt2661_softc *sc, u_int8_t id)
-{
- struct rt2661_amrr_node *amn, *a, *ret = NULL;
- int s;
-
- if (id == RT2661_AMRR_INVALID_ID)
- return NULL;
-
- s = splnet();
- TAILQ_FOREACH_SAFE(amn, &sc->amn, entry, a) {
- /* If the corresponding node was freed, free the amrr node. */
- if (amn->rn == NULL)
- rt2661_amrr_node_free(sc, amn);
- else if (amn->id == id)
- ret = amn;
- }
- splx(s);
-
- return ret;
-}
-
struct ieee80211_node *
rt2661_node_alloc(struct ieee80211com *ic)
{
- struct rt2661_node *rn;
-
- rn = malloc(sizeof (struct rt2661_node), M_DEVBUF,
+ return malloc(sizeof (struct rt2661_node), M_DEVBUF,
M_NOWAIT | M_ZERO);
- if (rn == NULL)
- return NULL;
-
- rn->amn = rt2661_amrr_node_alloc(ic, rn);
- return (struct ieee80211_node *)rn;
-}
-
-void
-rt2661_node_free(struct ieee80211com *ic, struct ieee80211_node *ni)
-{
- struct rt2661_softc *sc = ic->ic_softc;
- struct rt2661_node *rn = (struct rt2661_node *)ni;
-
- if (rn->amn)
- rn->amn->rn = NULL;
- sc->sc_node_free(ic, ni);
}
int
@@ -877,8 +754,7 @@ rt2661_iter_func(void *arg, struct ieee8
struct rt2661_softc *sc = arg;
struct rt2661_node *rn = (struct rt2661_node *)ni;
- if (rn->amn)
- ieee80211_amrr_choose(&sc->amrr, ni, &rn->amn->amn);
+ ieee80211_amrr_choose(&sc->amrr, ni, &rn->amn);
}
/*
@@ -898,11 +774,9 @@ rt2661_updatestats(void *arg)
else
ieee80211_iterate_nodes(ic, rt2661_iter_func, arg);
- /* update rx sensitivity and free unused amrr nodes every 1 sec */
- if (++sc->ncalls & 1) {
+ /* update rx sensitivity every 1 sec */
+ if (++sc->ncalls & 1)
rt2661_rx_tune(sc);
- rt2661_amrr_node_free_unused(sc);
- }
splx(s);
timeout_add_msec(&sc->amrr_to, 500);
@@ -912,11 +786,9 @@ void
rt2661_newassoc(struct ieee80211com *ic, struct ieee80211_node *ni, int isnew)
{
struct rt2661_softc *sc = ic->ic_softc;
- struct rt2661_node *rn = (struct rt2661_node *)ni;
int i;
- if (rn->amn)
- ieee80211_amrr_node_init(&sc->amrr, &rn->amn->amn);
+ ieee80211_amrr_node_init(&sc->amrr, &((struct rt2661_node *)ni)->amn);
/* set rate to some reasonable initial value */
for (i = ni->ni_rates.rs_nrates - 1;
@@ -1050,25 +922,32 @@ rt2661_eeprom_read(struct rt2661_softc *
return val;
}
-/* The TX interrupt handler accumulates statistics based on whether frames
- * were sent successfully by the ASIC. */
void
rt2661_tx_intr(struct rt2661_softc *sc)
{
struct ieee80211com *ic = &sc->sc_ic;
struct ifnet *ifp = &ic->ic_if;
- struct rt2661_amrr_node *amn;
- int retrycnt;
- u_int8_t amrr_id;
+ struct rt2661_tx_ring *txq;
+ struct rt2661_tx_data *data;
+ struct rt2661_node *rn;
+ int qid, retrycnt;
for (;;) {
const uint32_t val = RAL_READ(sc, RT2661_STA_CSR4);
if (!(val & RT2661_TX_STAT_VALID))
break;
+ /* retrieve the queue in which this frame was sent */
+ qid = RT2661_TX_QID(val);
+ txq = (qid <= 3) ? &sc->txq[qid] : &sc->mgtq;
+
/* retrieve rate control algorithm context */
- amrr_id = RT2661_TX_PRIV_DATA(val);
- amn = rt2661_amrr_node_find(sc, amrr_id);
+ data = &txq->data[txq->stat];
+ rn = (struct rt2661_node *)data->ni;
+
+ /* if no frame has been sent, ignore */
+ if (rn == NULL)
+ continue;
switch (RT2661_TX_RESULT(val)) {
case RT2661_TX_SUCCESS:
@@ -1076,21 +955,17 @@ rt2661_tx_intr(struct rt2661_softc *sc)
DPRINTFN(10, ("data frame sent successfully after "
"%d retries\n", retrycnt));
- if (amn) {
- amn->amn.amn_txcnt++;
- if (retrycnt > 0)
- amn->amn.amn_retrycnt++;
- }
+ rn->amn.amn_txcnt++;
+ if (retrycnt > 0)
+ rn->amn.amn_retrycnt++;
ifp->if_opackets++;
break;
case RT2661_TX_RETRY_FAIL:
DPRINTFN(9, ("sending data frame failed (too much "
"retries)\n"));
- if (amn) {
- amn->amn.amn_txcnt++;
- amn->amn.amn_retrycnt++;
- }
+ rn->amn.amn_txcnt++;
+ rn->amn.amn_retrycnt++;
ifp->if_oerrors++;
break;
@@ -1101,19 +976,24 @@ rt2661_tx_intr(struct rt2661_softc *sc)
ifp->if_oerrors++;
}
- DPRINTFN(15, ("tx done amrr_id=%hhu amn=0x%x\n", amrr_id, amn));
+ ieee80211_release_node(ic, data->ni);
+ data->ni = NULL;
+
+ DPRINTFN(15, ("tx done q=%d idx=%u\n", qid, txq->stat));
+
+ txq->queued--;
+ if (++txq->stat >= txq->count) /* faster than % count */
+ txq->stat = 0;
}
+
+ sc->sc_tx_timer = 0;
+ ifp->if_flags &= ~IFF_OACTIVE;
+ rt2661_start(ifp);
}
-/* The TX DMA interrupt handler processes frames which have been offloaded
- * to the ASIC for transmission. We can free all resources corresponding
- * to the frame here. */
void
rt2661_tx_dma_intr(struct rt2661_softc *sc, struct rt2661_tx_ring *txq)
{
- struct ieee80211com *ic = &sc->sc_ic;
- struct ifnet *ifp = &ic->ic_if;
-
for (;;) {
struct rt2661_tx_desc *desc = &txq->desc[txq->next];
struct rt2661_tx_data *data = &txq->data[txq->next];
@@ -1138,28 +1018,13 @@ rt2661_tx_dma_intr(struct rt2661_softc *
bus_dmamap_unload(sc->sc_dmat, data->map);
m_freem(data->m);
data->m = NULL;
- ieee80211_release_node(ic, data->ni);
- data->ni = NULL;
+ /* node reference is released in rt2661_tx_intr() */
DPRINTFN(15, ("tx dma done q=%p idx=%u\n", txq, txq->next));
- txq->queued--;
if (++txq->next >= txq->count) /* faster than % count */
txq->next = 0;
}
-
- if (sc->mgtq.queued == 0 && sc->txq[0].queued == 0)
- sc->sc_tx_timer = 0;
- if (sc->mgtq.queued < RT2661_MGT_RING_COUNT &&
- sc->txq[0].queued < RT2661_TX_RING_COUNT - 1) {
- if (sc->mgtq.queued < RT2661_MGT_RING_COUNT)
- sc->sc_flags &= ~RT2661_MGT_OACTIVE;
- if (sc->txq[0].queued < RT2661_TX_RING_COUNT - 1)
- sc->sc_flags &= ~RT2661_DATA_OACTIVE;
- if (!(sc->sc_flags & (RT2661_MGT_OACTIVE|RT2661_DATA_OACTIVE)))
- ifp->if_flags &= ~IFF_OACTIVE;
- rt2661_start(ifp);
- }
}
void
@@ -1556,7 +1421,7 @@ rt2661_plcp_signal(int rate)
void
rt2661_setup_tx_desc(struct rt2661_softc *sc, struct rt2661_tx_desc *desc,
uint32_t flags, uint16_t xflags, int len, int rate,
- const bus_dma_segment_t *segs, int nsegs, int ac, u_int8_t amrr_id)
+ const bus_dma_segment_t *segs, int nsegs, int ac)
{
struct ieee80211com *ic = &sc->sc_ic;
uint16_t plcp_length;
@@ -1576,11 +1441,11 @@ rt2661_setup_tx_desc(struct rt2661_softc
RT2661_LOGCWMAX(10));
/*
- * Remember the ID of the AMRR node to update when Tx completes.
- * This field is driver private data only. It will be made available
- * by the NIC in STA_CSR4 on Tx interrupts.
+ * Remember in which queue this frame was sent. This field is driver
+ * private data only. It will be made available by the NIC in STA_CSR4
+ * on Tx interrupts.
*/
- desc->priv_data = amrr_id;
+ desc->qid = ac;
/* setup PLCP fields */
desc->plcp_signal = rt2661_plcp_signal(rate);
@@ -1684,7 +1549,7 @@ rt2661_tx_mgt(struct rt2661_softc *sc, s
rt2661_setup_tx_desc(sc, desc, flags, 0 /* XXX HWSEQ */,
m0->m_pkthdr.len, rate, data->map->dm_segs, data->map->dm_nsegs,
- RT2661_QID_MGT, RT2661_AMRR_INVALID_ID);
+ RT2661_QID_MGT);
bus_dmamap_sync(sc->sc_dmat, data->map, 0, data->map->dm_mapsize,
BUS_DMASYNC_PREWRITE);
@@ -1709,7 +1574,6 @@ rt2661_tx_data(struct rt2661_softc *sc,
{
struct ieee80211com *ic = &sc->sc_ic;
struct rt2661_tx_ring *txq = &sc->txq[ac];
- struct rt2661_node *rn;
struct rt2661_tx_desc *desc;
struct rt2661_tx_data *data;
struct ieee80211_frame *wh;
@@ -1719,7 +1583,6 @@ rt2661_tx_data(struct rt2661_softc *sc,
uint32_t flags = 0;
int pktlen, rate, needcts = 0, needrts = 0, error;
- rn = ((ni == ic->ic_bss) ? NULL : (struct rt2661_node *)ni);
wh = mtod(m0, struct ieee80211_frame *);
if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) {
@@ -1814,8 +1677,7 @@ rt2661_tx_data(struct rt2661_softc *sc,
rt2661_setup_tx_desc(sc, desc,
(needrts ? RT2661_TX_NEED_ACK : 0) | RT2661_TX_MORE_FRAG,
0, mprot->m_pkthdr.len, protrate, data->map->dm_segs,
- data->map->dm_nsegs, ac,
- (rn && rn->amn) ? rn->amn->id : RT2661_AMRR_INVALID_ID);
+ data->map->dm_nsegs, ac);
bus_dmamap_sync(sc->sc_dmat, data->map, 0,
data->map->dm_mapsize, BUS_DMASYNC_PREWRITE);
@@ -1905,8 +1767,7 @@ rt2661_tx_data(struct rt2661_softc *sc,
}
rt2661_setup_tx_desc(sc, desc, flags, 0, m0->m_pkthdr.len, rate,
- data->map->dm_segs, data->map->dm_nsegs, ac,
- (rn && rn->amn) ? rn->amn->id : RT2661_AMRR_INVALID_ID);
+ data->map->dm_segs, data->map->dm_nsegs, ac);
bus_dmamap_sync(sc->sc_dmat, data->map, 0, data->map->dm_mapsize,
BUS_DMASYNC_PREWRITE);
@@ -2745,7 +2606,6 @@ rt2661_stop(struct ifnet *ifp, int disab
ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
ieee80211_new_state(ic, IEEE80211_S_INIT, -1); /* free all nodes */
- rt2661_amrr_node_free_all(sc);
/* abort Tx (for all 5 Tx rings) */
RAL_WRITE(sc, RT2661_TX_CNTL_CSR, 0x1f << 16);
@@ -2956,8 +2816,7 @@ rt2661_prepare_beacon(struct rt2661_soft
rate = IEEE80211_IS_CHAN_5GHZ(ni->ni_chan) ? 12 : 2;
rt2661_setup_tx_desc(sc, &desc, RT2661_TX_TIMESTAMP, RT2661_TX_HWSEQ,
- m0->m_pkthdr.len, rate, NULL, 0, RT2661_QID_MGT,
- RT2661_AMRR_INVALID_ID);
+ m0->m_pkthdr.len, rate, NULL, 0, RT2661_QID_MGT);
/* copy the first 24 bytes of Tx descriptor into NIC memory */
RAL_WRITE_REGION_1(sc, RT2661_HW_BEACON_BASE0, (uint8_t *)&desc, 24);
Index: rt2661reg.h
===================================================================
RCS file: /cvs/src/sys/dev/ic/rt2661reg.h,v
retrieving revision 1.12
retrieving revision 1.11
diff -u -p -r1.12 -r1.11
--- rt2661reg.h 23 Aug 2012 10:34:25 -0000 1.12
+++ rt2661reg.h 14 Feb 2010 09:20:34 -0000 1.11
@@ -189,9 +189,7 @@
#define RT2661_TX_STAT_VALID (1 << 0)
#define RT2661_TX_RESULT(v) (((v) >> 1) & 0x7)
#define RT2661_TX_RETRYCNT(v) (((v) >> 4) & 0xf)
-/* Driver-private data written before TX and read back when TX completes.
- * We store the driver-private ID of an AMRR node in here. */
-#define RT2661_TX_PRIV_DATA(v) (((v) >> 8) & 0xff)
+#define RT2661_TX_QID(v) (((v) >> 8) & 0xf)
#define RT2661_TX_SUCCESS 0
#define RT2661_TX_RETRY_FAIL 6
@@ -247,7 +245,7 @@ struct rt2661_tx_desc {
uint32_t eiv;
uint8_t offset;
- uint8_t priv_data;
+ uint8_t qid;
#define RT2661_QID_MGT 13
uint8_t txpower;
Index: rt2661var.h
===================================================================
RCS file: /cvs/src/sys/dev/ic/rt2661var.h,v
retrieving revision 1.17
retrieving revision 1.16
diff -u -p -r1.17 -r1.16
--- rt2661var.h 23 Aug 2012 10:34:25 -0000 1.17
+++ rt2661var.h 17 Jul 2012 14:43:12 -0000 1.16
@@ -82,19 +82,9 @@ struct rt2661_rx_ring {
int next;
};
-#define RT2661_AMRR_NODES_MAX 100 /* based on IEEE80211_CACHE_SIZE */
-#define RT2661_AMRR_INVALID_ID (RT2661_AMRR_NODES_MAX + 1)
-
-struct rt2661_amrr_node {
- struct ieee80211_amrr_node amn;
- struct rt2661_node *rn;
- u_int8_t id;
- TAILQ_ENTRY(rt2661_amrr_node) entry;
-};
-
struct rt2661_node {
struct ieee80211_node ni;
- struct rt2661_amrr_node *amn;
+ struct ieee80211_amrr_node amn;
};
struct rt2661_softc {
@@ -121,8 +111,6 @@ struct rt2661_softc {
#define RT2661_UPDATE_SLOT (1 << 1)
#define RT2661_SET_SLOTTIME (1 << 2)
#define RT2661_FWLOADED (1 << 3)
-#define RT2661_MGT_OACTIVE (1 << 4)
-#define RT2661_DATA_OACTIVE (1 << 5)
int sc_tx_timer;
@@ -187,10 +175,6 @@ struct rt2661_softc {
#define sc_txtap sc_txtapu.th
int sc_txtap_len;
#endif
- void (*sc_node_free)(struct ieee80211com *,
- struct ieee80211_node *);
- TAILQ_HEAD(, rt2661_amrr_node) amn;
- u_int8_t amn_count;
};
int rt2661_attach(void *, int);