Date: Fri, 12 Nov 2010 14:39:41 -0700
From: Theo de Raadt<[email protected]>
commit. someone will eventually fix MCLGETI, since it is in the tree.
The problem is that re(4) has a "forever" loop from which we only
escape if none of the RL_INTRS_CPLUS bits are set in the interrupt
status register. One of those is the "Rx Descriptor Unavailable" bit
(RL_ISR_RX_OVERRUN). So if we run out of populated descriptors and
can't allocate any new ones, we'll spin forever. Obviously chances of
that happening are much larger with MCLGETI, but it may also happen
without if you run out of mbufs.
The diff below fixes the issue for me (first diff is relative to the
code before the backout, second diff includes a revert of the
backout). This makes sure we drop out of the interrupt handler such
that we have a chance to free some mbufs. None of the drivers I'm
familliar with have this loop that repeatedly reads the interrupt
status register. Except for vr(4), which seems to suffer from the
same problem as re(4) with MCLGETI.
I did some tests with tcpbench(1) and it seems throughput is slightly
smaller (compared to non-MCLGETI), but the difference is less than 1%.
I'd appreciate it if the people that experienced the issues can test
this diff (use the 2nd diff if you have a -current source tree).
--- first diff ---
Index: re.c
===================================================================
RCS file: /cvs/src/sys/dev/ic/re.c,v
retrieving revision 1.129
diff -u -p -r1.129 re.c
--- re.c 5 Oct 2010 08:57:34 -0000 1.129
+++ re.c 27 Nov 2010 18:18:40 -0000
@@ -1605,21 +1605,17 @@ re_intr(void *arg)
return (0);
rx = tx = 0;
- for (;;) {
+ status = CSR_READ_2(sc, RL_ISR);
+ /* If the card has gone away the read returns 0xffff. */
+ if (status == 0xffff)
+ return (0);
+ if (status)
+ CSR_WRITE_2(sc, RL_ISR, status);
- status = CSR_READ_2(sc, RL_ISR);
- /* If the card has gone away the read returns 0xffff. */
- if (status == 0xffff)
- break;
- if (status)
- CSR_WRITE_2(sc, RL_ISR, status);
-
- if (status& RL_ISR_TIMEOUT_EXPIRED)
- claimed = 1;
-
- if ((status& RL_INTRS_CPLUS) == 0)
- break;
+ if (status& RL_ISR_TIMEOUT_EXPIRED)
+ claimed = 1;
+ if (status& RL_INTRS_CPLUS) {
if (status& (sc->rl_rx_ack | RL_ISR_RX_ERR)) {
rx |= re_rxeof(sc);
claimed = 1;
--- second diff diff ---
Index: re.c
===================================================================
RCS file: /cvs/src/sys/dev/ic/re.c,v
retrieving revision 1.130
diff -u -p -r1.130 re.c
--- re.c 12 Nov 2010 22:17:30 -0000 1.130
+++ re.c 27 Nov 2010 18:30:22 -0000
@@ -1,4 +1,4 @@
-/* $OpenBSD: re.c,v 1.130 2010/11/12 22:17:30 sthen Exp $ */
+/* $OpenBSD: re.c,v 1.129 2010/10/05 08:57:34 mikeb Exp $ */
/* $FreeBSD: if_re.c,v 1.31 2004/09/04 07:54:05 ru Exp $ */
/*
* Copyright (c) 1997, 1998-2003
@@ -162,8 +162,9 @@ static inline void re_set_bufaddr(struct
int re_encap(struct rl_softc *, struct mbuf *, int *);
-int re_newbuf(struct rl_softc *, int, struct mbuf *);
+int re_newbuf(struct rl_softc *);
int re_rx_list_init(struct rl_softc *);
+void re_rx_list_fill(struct rl_softc *);
int re_tx_list_init(struct rl_softc *);
int re_rxeof(struct rl_softc *);
int re_txeof(struct rl_softc *);
@@ -1108,6 +1109,8 @@ re_attach(struct rl_softc *sc, const cha
IFQ_SET_MAXLEN(&ifp->if_snd, RL_TX_QLEN);
IFQ_SET_READY(&ifp->if_snd);
+ m_clsetwms(ifp, MCLBYTES, 2, RL_RX_DESC_CNT);
+
ifp->if_capabilities = IFCAP_VLAN_MTU | IFCAP_CSUM_IPv4 |
IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;
@@ -1212,28 +1215,18 @@ fail_0:
int
-re_newbuf(struct rl_softc *sc, int idx, struct mbuf *m)
+re_newbuf(struct rl_softc *sc)
{
- struct mbuf *n = NULL;
+ struct mbuf *m;
bus_dmamap_t map;
struct rl_desc *d;
struct rl_rxsoft *rxs;
u_int32_t cmdstat;
- int error;
+ int error, idx;
- if (m == NULL) {
- MGETHDR(n, M_DONTWAIT, MT_DATA);
- if (n == NULL)
- return (ENOBUFS);
-
- MCLGET(n, M_DONTWAIT);
- if (!(n->m_flags& M_EXT)) {
- m_freem(n);
- return (ENOBUFS);
- }
- m = n;
- } else
- m->m_data = m->m_ext.ext_buf;
+ m = MCLGETI(NULL, M_DONTWAIT,&sc->sc_arpcom.ac_if, MCLBYTES);
+ if (!m)
+ return (ENOBUFS);
/*
* Initialize mbuf length fields and fixup
@@ -1243,13 +1236,15 @@ re_newbuf(struct rl_softc *sc, int idx,
m->m_len = m->m_pkthdr.len = RE_RX_DESC_BUFLEN;
m->m_data += RE_ETHER_ALIGN;
+ idx = sc->rl_ldata.rl_rx_prodidx;
rxs =&sc->rl_ldata.rl_rxsoft[idx];
map = rxs->rxs_dmamap;
error = bus_dmamap_load_mbuf(sc->sc_dmat, map, m,
BUS_DMA_READ|BUS_DMA_NOWAIT);
-
- if (error)
- goto out;
+ if (error) {
+ m_freem(m);
+ return (ENOBUFS);
+ }
bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize,
BUS_DMASYNC_PREREAD);
@@ -1261,7 +1256,8 @@ re_newbuf(struct rl_softc *sc, int idx,
if (cmdstat& RL_RDESC_STAT_OWN) {
printf("%s: tried to map busy RX descriptor\n",
sc->sc_dev.dv_xname);
- goto out;
+ m_freem(m);
+ return (ENOBUFS);
}
rxs->rxs_mbuf = m;
@@ -1277,11 +1273,10 @@ re_newbuf(struct rl_softc *sc, int idx,
d->rl_cmdstat = htole32(cmdstat);
RL_RXDESCSYNC(sc, idx, BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
+ sc->rl_ldata.rl_rx_prodidx = RL_NEXT_RX_DESC(sc, idx);
+ sc->rl_ldata.rl_rx_cnt++;
+
return (0);
- out:
- if (n != NULL)
- m_freem(n);
- return (ENOMEM);
}
@@ -1310,21 +1305,27 @@ re_tx_list_init(struct rl_softc *sc)
int
re_rx_list_init(struct rl_softc *sc)
{
- int i;
-
- memset((char *)sc->rl_ldata.rl_rx_list, 0, RL_RX_LIST_SZ);
-
- for (i = 0; i< RL_RX_DESC_CNT; i++) {
- if (re_newbuf(sc, i, NULL) == ENOBUFS)
- return (ENOBUFS);
- }
+ bzero(sc->rl_ldata.rl_rx_list, RL_RX_LIST_SZ);
sc->rl_ldata.rl_rx_prodidx = 0;
+ sc->rl_ldata.rl_rx_considx = 0;
+ sc->rl_ldata.rl_rx_cnt = 0;
sc->rl_head = sc->rl_tail = NULL;
+ re_rx_list_fill(sc);
+
return (0);
}
+void
+re_rx_list_fill(struct rl_softc *sc)
+{
+ while (sc->rl_ldata.rl_rx_cnt< RL_RX_DESC_CNT) {
+ if (re_newbuf(sc) == ENOBUFS)
+ break;
+ }
+}
+
/*
* RX handler for C+ and 8169. For the gigE chips, we support
* the reception of jumbo frames that have been fragmented
@@ -1342,7 +1343,8 @@ re_rxeof(struct rl_softc *sc)
ifp =&sc->sc_arpcom.ac_if;
- for (i = sc->rl_ldata.rl_rx_prodidx;; i = RL_NEXT_RX_DESC(sc, i)) {
+ for (i = sc->rl_ldata.rl_rx_considx; sc->rl_ldata.rl_rx_cnt> 0;
+ i = RL_NEXT_RX_DESC(sc, i)) {
cur_rx =&sc->rl_ldata.rl_rx_list[i];
RL_RXDESCSYNC(sc, i,
BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
@@ -1354,6 +1356,8 @@ re_rxeof(struct rl_softc *sc)
total_len = rxstat& sc->rl_rxlenmask;
rxs =&sc->rl_ldata.rl_rxsoft[i];
m = rxs->rxs_mbuf;
+ rxs->rxs_mbuf = NULL;
+ sc->rl_ldata.rl_rx_cnt--;
rx = 1;
/* Invalidate the RX mbuf and unload its map */
@@ -1372,7 +1376,6 @@ re_rxeof(struct rl_softc *sc)
sc->rl_tail->m_next = m;
sc->rl_tail = m;
}
- re_newbuf(sc, i, NULL);
continue;
}
@@ -1410,22 +1413,6 @@ re_rxeof(struct rl_softc *sc)
m_freem(sc->rl_head);
sc->rl_head = sc->rl_tail = NULL;
}
- re_newbuf(sc, i, m);
- continue;
- }
-
- /*
- * If allocating a replacement mbuf fails,
- * reload the current one.
- */
-
- if (re_newbuf(sc, i, NULL)) {
- ifp->if_ierrors++;
- if (sc->rl_head != NULL) {
- m_freem(sc->rl_head);
- sc->rl_head = sc->rl_tail = NULL;
- }
- re_newbuf(sc, i, m);
continue;
}
@@ -1503,7 +1490,8 @@ re_rxeof(struct rl_softc *sc)
ether_input_mbuf(ifp, m);
}
- sc->rl_ldata.rl_rx_prodidx = i;
+ sc->rl_ldata.rl_rx_considx = i;
+ re_rx_list_fill(sc);
return (rx);
}
@@ -1617,21 +1605,17 @@ re_intr(void *arg)
return (0);
rx = tx = 0;
- for (;;) {
-
- status = CSR_READ_2(sc, RL_ISR);
- /* If the card has gone away the read returns 0xffff. */
- if (status == 0xffff)
- break;
- if (status)
- CSR_WRITE_2(sc, RL_ISR, status);
-
- if (status& RL_ISR_TIMEOUT_EXPIRED)
- claimed = 1;
+ status = CSR_READ_2(sc, RL_ISR);
+ /* If the card has gone away the read returns 0xffff. */
+ if (status == 0xffff)
+ return (0);
+ if (status)
+ CSR_WRITE_2(sc, RL_ISR, status);
- if ((status& RL_INTRS_CPLUS) == 0)
- break;
+ if (status& RL_ISR_TIMEOUT_EXPIRED)
+ claimed = 1;
+ if (status& RL_INTRS_CPLUS) {
if (status& (sc->rl_rx_ack | RL_ISR_RX_ERR)) {
rx |= re_rxeof(sc);
claimed = 1;
Index: rtl81x9reg.h
===================================================================
RCS file: /cvs/src/sys/dev/ic/rtl81x9reg.h,v
retrieving revision 1.71
diff -u -p -r1.71 rtl81x9reg.h
--- rtl81x9reg.h 12 Nov 2010 22:17:30 -0000 1.71
+++ rtl81x9reg.h 27 Nov 2010 18:30:23 -0000
@@ -1,4 +1,4 @@
-/* $OpenBSD: rtl81x9reg.h,v 1.71 2010/11/12 22:17:30 sthen Exp $ */
+/* $OpenBSD: rtl81x9reg.h,v 1.70 2010/09/07 16:21:43 deraadt Exp $ */
/*
* Copyright (c) 1997, 1998
@@ -787,7 +787,9 @@ struct rl_list_data {
struct rl_rxsoft rl_rxsoft[RL_RX_DESC_CNT];
bus_dmamap_t rl_rx_list_map;
struct rl_desc *rl_rx_list;
+ int rl_rx_considx;
int rl_rx_prodidx;
+ int rl_rx_cnt;
bus_dma_segment_t rl_rx_listseg;
int rl_rx_listnseg;
};