Hello,

it looks like everything is working fine with this patch.

best regards
M.K.


W dniu 2010-11-27 20:12, Mark Kettenis pisze:
Date: Fri, 12 Nov 2010 14:39:41 -0700
From: Theo de Raadt<[email protected]>

commit.  someone will eventually fix MCLGETI, since it is in the tree.
The problem is that re(4) has a "forever" loop from which we only
escape if none of the RL_INTRS_CPLUS bits are set in the interrupt
status register.  One of those is the "Rx Descriptor Unavailable" bit
(RL_ISR_RX_OVERRUN).  So if we run out of populated descriptors and
can't allocate any new ones, we'll spin forever.  Obviously chances of
that happening are much larger with MCLGETI, but it may also happen
without if you run out of mbufs.

The diff below fixes the issue for me (first diff is relative to the
code before the backout, second diff includes a revert of the
backout).  This makes sure we drop out of the interrupt handler such
that we have a chance to free some mbufs.  None of the drivers I'm
familliar with have this loop that repeatedly reads the interrupt
status register.  Except for vr(4), which seems to suffer from the
same problem as re(4) with MCLGETI.

I did some tests with tcpbench(1) and it seems throughput is slightly
smaller (compared to non-MCLGETI), but the difference is less than 1%.

I'd appreciate it if the people that experienced the issues can test
this diff (use the 2nd diff if you have a -current source tree).

--- first diff ---

Index: re.c
===================================================================
RCS file: /cvs/src/sys/dev/ic/re.c,v
retrieving revision 1.129
diff -u -p -r1.129 re.c
--- re.c        5 Oct 2010 08:57:34 -0000       1.129
+++ re.c        27 Nov 2010 18:18:40 -0000
@@ -1605,21 +1605,17 @@ re_intr(void *arg)
                return (0);

        rx = tx = 0;
-       for (;;) {
+       status = CSR_READ_2(sc, RL_ISR);
+       /* If the card has gone away the read returns 0xffff. */
+       if (status == 0xffff)
+               return (0);
+       if (status)
+               CSR_WRITE_2(sc, RL_ISR, status);

-               status = CSR_READ_2(sc, RL_ISR);
-               /* If the card has gone away the read returns 0xffff. */
-               if (status == 0xffff)
-                       break;
-               if (status)
-                       CSR_WRITE_2(sc, RL_ISR, status);
-
-               if (status&  RL_ISR_TIMEOUT_EXPIRED)
-                       claimed = 1;
-
-               if ((status&  RL_INTRS_CPLUS) == 0)
-                       break;
+       if (status&  RL_ISR_TIMEOUT_EXPIRED)
+               claimed = 1;

+       if (status&  RL_INTRS_CPLUS) {
                if (status&  (sc->rl_rx_ack | RL_ISR_RX_ERR)) {
                        rx |= re_rxeof(sc);
                        claimed = 1;


--- second diff diff ---

Index: re.c
===================================================================
RCS file: /cvs/src/sys/dev/ic/re.c,v
retrieving revision 1.130
diff -u -p -r1.130 re.c
--- re.c        12 Nov 2010 22:17:30 -0000      1.130
+++ re.c        27 Nov 2010 18:30:22 -0000
@@ -1,4 +1,4 @@
-/*     $OpenBSD: re.c,v 1.130 2010/11/12 22:17:30 sthen Exp $  */
+/*     $OpenBSD: re.c,v 1.129 2010/10/05 08:57:34 mikeb Exp $  */
  /*    $FreeBSD: if_re.c,v 1.31 2004/09/04 07:54:05 ru Exp $   */
  /*
   * Copyright (c) 1997, 1998-2003
@@ -162,8 +162,9 @@ static inline void re_set_bufaddr(struct

  int   re_encap(struct rl_softc *, struct mbuf *, int *);

-int    re_newbuf(struct rl_softc *, int, struct mbuf *);
+int    re_newbuf(struct rl_softc *);
  int   re_rx_list_init(struct rl_softc *);
+void   re_rx_list_fill(struct rl_softc *);
  int   re_tx_list_init(struct rl_softc *);
  int   re_rxeof(struct rl_softc *);
  int   re_txeof(struct rl_softc *);
@@ -1108,6 +1109,8 @@ re_attach(struct rl_softc *sc, const cha
        IFQ_SET_MAXLEN(&ifp->if_snd, RL_TX_QLEN);
        IFQ_SET_READY(&ifp->if_snd);

+       m_clsetwms(ifp, MCLBYTES, 2, RL_RX_DESC_CNT);
+
        ifp->if_capabilities = IFCAP_VLAN_MTU | IFCAP_CSUM_IPv4 |
                               IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;

@@ -1212,28 +1215,18 @@ fail_0:


  int
-re_newbuf(struct rl_softc *sc, int idx, struct mbuf *m)
+re_newbuf(struct rl_softc *sc)
  {
-       struct mbuf     *n = NULL;
+       struct mbuf     *m;
        bus_dmamap_t    map;
        struct rl_desc  *d;
        struct rl_rxsoft *rxs;
        u_int32_t       cmdstat;
-       int             error;
+       int             error, idx;

-       if (m == NULL) {
-               MGETHDR(n, M_DONTWAIT, MT_DATA);
-               if (n == NULL)
-                       return (ENOBUFS);
-
-               MCLGET(n, M_DONTWAIT);
-               if (!(n->m_flags&  M_EXT)) {
-                       m_freem(n);
-                       return (ENOBUFS);
-               }
-               m = n;
-       } else
-               m->m_data = m->m_ext.ext_buf;
+       m = MCLGETI(NULL, M_DONTWAIT,&sc->sc_arpcom.ac_if, MCLBYTES);
+       if (!m)
+               return (ENOBUFS);

        /*
         * Initialize mbuf length fields and fixup
@@ -1243,13 +1236,15 @@ re_newbuf(struct rl_softc *sc, int idx,
        m->m_len = m->m_pkthdr.len = RE_RX_DESC_BUFLEN;
        m->m_data += RE_ETHER_ALIGN;

+       idx = sc->rl_ldata.rl_rx_prodidx;
        rxs =&sc->rl_ldata.rl_rxsoft[idx];
        map = rxs->rxs_dmamap;
        error = bus_dmamap_load_mbuf(sc->sc_dmat, map, m,
            BUS_DMA_READ|BUS_DMA_NOWAIT);
-
-       if (error)
-               goto out;
+       if (error) {
+               m_freem(m);
+               return (ENOBUFS);
+       }

        bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize,
            BUS_DMASYNC_PREREAD);
@@ -1261,7 +1256,8 @@ re_newbuf(struct rl_softc *sc, int idx,
        if (cmdstat&  RL_RDESC_STAT_OWN) {
                printf("%s: tried to map busy RX descriptor\n",
                    sc->sc_dev.dv_xname);
-               goto out;
+               m_freem(m);
+               return (ENOBUFS);
        }

        rxs->rxs_mbuf = m;
@@ -1277,11 +1273,10 @@ re_newbuf(struct rl_softc *sc, int idx,
        d->rl_cmdstat = htole32(cmdstat);
        RL_RXDESCSYNC(sc, idx, BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);

+       sc->rl_ldata.rl_rx_prodidx = RL_NEXT_RX_DESC(sc, idx);
+       sc->rl_ldata.rl_rx_cnt++;
+
        return (0);
- out:
-       if (n != NULL)
-               m_freem(n);
-       return (ENOMEM);
  }


@@ -1310,21 +1305,27 @@ re_tx_list_init(struct rl_softc *sc)
  int
  re_rx_list_init(struct rl_softc *sc)
  {
-       int     i;
-
-       memset((char *)sc->rl_ldata.rl_rx_list, 0, RL_RX_LIST_SZ);
-
-       for (i = 0; i<  RL_RX_DESC_CNT; i++) {
-               if (re_newbuf(sc, i, NULL) == ENOBUFS)
-                       return (ENOBUFS);
-       }
+       bzero(sc->rl_ldata.rl_rx_list, RL_RX_LIST_SZ);

        sc->rl_ldata.rl_rx_prodidx = 0;
+       sc->rl_ldata.rl_rx_considx = 0;
+       sc->rl_ldata.rl_rx_cnt = 0;
        sc->rl_head = sc->rl_tail = NULL;

+       re_rx_list_fill(sc);
+
        return (0);
  }

+void
+re_rx_list_fill(struct rl_softc *sc)
+{
+       while (sc->rl_ldata.rl_rx_cnt<  RL_RX_DESC_CNT) {
+               if (re_newbuf(sc) == ENOBUFS)
+                       break;
+       }
+}
+
  /*
   * RX handler for C+ and 8169. For the gigE chips, we support
   * the reception of jumbo frames that have been fragmented
@@ -1342,7 +1343,8 @@ re_rxeof(struct rl_softc *sc)

        ifp =&sc->sc_arpcom.ac_if;

-       for (i = sc->rl_ldata.rl_rx_prodidx;; i = RL_NEXT_RX_DESC(sc, i)) {
+       for (i = sc->rl_ldata.rl_rx_considx; sc->rl_ldata.rl_rx_cnt>  0;
+            i = RL_NEXT_RX_DESC(sc, i)) {
                cur_rx =&sc->rl_ldata.rl_rx_list[i];
                RL_RXDESCSYNC(sc, i,
                    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
@@ -1354,6 +1356,8 @@ re_rxeof(struct rl_softc *sc)
                total_len = rxstat&  sc->rl_rxlenmask;
                rxs =&sc->rl_ldata.rl_rxsoft[i];
                m = rxs->rxs_mbuf;
+               rxs->rxs_mbuf = NULL;
+               sc->rl_ldata.rl_rx_cnt--;
                rx = 1;

                /* Invalidate the RX mbuf and unload its map */
@@ -1372,7 +1376,6 @@ re_rxeof(struct rl_softc *sc)
                                sc->rl_tail->m_next = m;
                                sc->rl_tail = m;
                        }
-                       re_newbuf(sc, i, NULL);
                        continue;
                }

@@ -1410,22 +1413,6 @@ re_rxeof(struct rl_softc *sc)
                                m_freem(sc->rl_head);
                                sc->rl_head = sc->rl_tail = NULL;
                        }
-                       re_newbuf(sc, i, m);
-                       continue;
-               }
-
-               /*
-                * If allocating a replacement mbuf fails,
-                * reload the current one.
-                */
-
-               if (re_newbuf(sc, i, NULL)) {
-                       ifp->if_ierrors++;
-                       if (sc->rl_head != NULL) {
-                               m_freem(sc->rl_head);
-                               sc->rl_head = sc->rl_tail = NULL;
-                       }
-                       re_newbuf(sc, i, m);
                        continue;
                }

@@ -1503,7 +1490,8 @@ re_rxeof(struct rl_softc *sc)
                ether_input_mbuf(ifp, m);
        }

-       sc->rl_ldata.rl_rx_prodidx = i;
+       sc->rl_ldata.rl_rx_considx = i;
+       re_rx_list_fill(sc);

        return (rx);
  }
@@ -1617,21 +1605,17 @@ re_intr(void *arg)
                return (0);

        rx = tx = 0;
-       for (;;) {
-
-               status = CSR_READ_2(sc, RL_ISR);
-               /* If the card has gone away the read returns 0xffff. */
-               if (status == 0xffff)
-                       break;
-               if (status)
-                       CSR_WRITE_2(sc, RL_ISR, status);
-
-               if (status&  RL_ISR_TIMEOUT_EXPIRED)
-                       claimed = 1;
+       status = CSR_READ_2(sc, RL_ISR);
+       /* If the card has gone away the read returns 0xffff. */
+       if (status == 0xffff)
+               return (0);
+       if (status)
+               CSR_WRITE_2(sc, RL_ISR, status);

-               if ((status&  RL_INTRS_CPLUS) == 0)
-                       break;
+       if (status&  RL_ISR_TIMEOUT_EXPIRED)
+               claimed = 1;

+       if (status&  RL_INTRS_CPLUS) {
                if (status&  (sc->rl_rx_ack | RL_ISR_RX_ERR)) {
                        rx |= re_rxeof(sc);
                        claimed = 1;
Index: rtl81x9reg.h
===================================================================
RCS file: /cvs/src/sys/dev/ic/rtl81x9reg.h,v
retrieving revision 1.71
diff -u -p -r1.71 rtl81x9reg.h
--- rtl81x9reg.h        12 Nov 2010 22:17:30 -0000      1.71
+++ rtl81x9reg.h        27 Nov 2010 18:30:23 -0000
@@ -1,4 +1,4 @@
-/*     $OpenBSD: rtl81x9reg.h,v 1.71 2010/11/12 22:17:30 sthen Exp $   */
+/*     $OpenBSD: rtl81x9reg.h,v 1.70 2010/09/07 16:21:43 deraadt Exp $ */

  /*
   * Copyright (c) 1997, 1998
@@ -787,7 +787,9 @@ struct rl_list_data {
        struct rl_rxsoft        rl_rxsoft[RL_RX_DESC_CNT];
        bus_dmamap_t            rl_rx_list_map;
        struct rl_desc          *rl_rx_list;
+       int                     rl_rx_considx;
        int                     rl_rx_prodidx;
+       int                     rl_rx_cnt;
        bus_dma_segment_t       rl_rx_listseg;
        int                     rl_rx_listnseg;
  };

Reply via email to