Darren Tucker's vlan tagging for vr motivated me. Here is a diff that 
implements transmit DMA segments, instead of copying fragmented mbufs every 
time. This should be a win for userland traffic, and NFS. It also implements a 
FreeBSD feature to only ask for TX completion interrupts every 8 packets, 
instead of every packet, which is another win for weak CPUs. FreeBSD has been 
doing DMA tx segments and 1/8 completion interrupts for 4 years across the same 
chips. Annoyingly, on first glance, the rhine chip still seems to send the same 
number completion interrupts. But it's clear that bus_dmamap_load_mbuf no 
longer fails at the top of vr_encap on packets with 8 or less mbuf fragments, 
avoiding the whole new mbuf and m_copydata dance for the majority of situations 
now. The next win would be to copy reyk's method from if_myx to create a new 
DMA segment for padding packets < VR_MINFRAMELEN instead of create a whole new 
mbuf and copying. Micro-optimizations for micro-platforms.

This is heavily influenced by yongari@FreeBSD's work 4 years ago. (In fact, 
maybe too much so. As far as I can tell, allowing for DMA transfers of MCLBYTES 
* VR_MAXFRAGS is overkill since a packet over the size of MCLBYTES isn't even 
possible with this chip. Also returns from vr_encap are now ENOFBUFS but the 
error gets ignored upstream at this point.)

Index: if_vr.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_vr.c,v
retrieving revision 1.115
diff -u -r1.115 if_vr.c
--- if_vr.c     18 Sep 2012 14:49:44 -0000      1.115
+++ if_vr.c     4 Oct 2012 17:12:08 -0000
@@ -113,7 +113,7 @@
        NULL, "vr", DV_IFNET
 };
 
-int vr_encap(struct vr_softc *, struct vr_chain *, struct mbuf *);
+int vr_encap(struct vr_softc *, struct vr_chain **, struct mbuf *);
 void vr_rxeof(struct vr_softc *);
 void vr_rxeoc(struct vr_softc *);
 void vr_txeof(struct vr_softc *);
@@ -720,13 +720,17 @@
 
        cd = &sc->vr_cdata;
        ld = sc->vr_ldata;
+
+       cd->vr_tx_pkts = 0;
+       cd->vr_tx_cnt = 0;
+
        for (i = 0; i < VR_TX_LIST_CNT; i++) {
                cd->vr_tx_chain[i].vr_ptr = &ld->vr_tx_list[i];
                cd->vr_tx_chain[i].vr_paddr =
                    sc->sc_listmap->dm_segs[0].ds_addr +
                    offsetof(struct vr_list_data, vr_tx_list[i]);
 
-               if (bus_dmamap_create(sc->sc_dmat, MCLBYTES, 1,
+               if (bus_dmamap_create(sc->sc_dmat, MCLBYTES * VR_MAXFRAGS, 
VR_MAXFRAGS,
                    MCLBYTES, 0, BUS_DMA_NOWAIT, &cd->vr_tx_chain[i].vr_map))
                        return (ENOBUFS);
 
@@ -984,11 +988,13 @@
         * frames that have been transmitted.
         */
        cur_tx = sc->vr_cdata.vr_tx_cons;
-       while(cur_tx->vr_mbuf != NULL) {
-               u_int32_t               txstat;
+       while (cur_tx != sc->vr_cdata.vr_tx_prod) {
+           
+               u_int32_t               txstat, txctl;
                int                     i;
 
                txstat = letoh32(cur_tx->vr_ptr->vr_status);
+               txctl = letoh32(cur_tx->vr_ptr->vr_ctl);
 
                if ((txstat & VR_TXSTAT_ABRT) ||
                    (txstat & VR_TXSTAT_UDF)) {
@@ -1002,7 +1008,7 @@
                                sc->vr_flags |= VR_F_RESTART;
                                break;
                        }
-                       VR_TXOWN(cur_tx) = htole32(VR_TXSTAT_OWN);
+                       cur_tx->vr_ptr->vr_status = htole32(VR_TXSTAT_OWN);
                        CSR_WRITE_4(sc, VR_TXADDR, cur_tx->vr_paddr);
                        break;
                }
@@ -1010,6 +1016,11 @@
                if (txstat & VR_TXSTAT_OWN)
                        break;
 
+               sc->vr_cdata.vr_tx_cnt--;
+               /* Only the first descriptor in the chain is valid. */
+               if ((txctl & VR_TXCTL_FIRSTFRAG) == 0)
+                       goto next;
+
                if (txstat & VR_TXSTAT_ERRSUM) {
                        ifp->if_oerrors++;
                        if (txstat & VR_TXSTAT_DEFER)
@@ -1028,11 +1039,12 @@
                cur_tx->vr_mbuf = NULL;
                ifp->if_flags &= ~IFF_OACTIVE;
 
+next:
                cur_tx = cur_tx->vr_nextdesc;
        }
 
        sc->vr_cdata.vr_tx_cons = cur_tx;
-       if (cur_tx->vr_mbuf == NULL)
+       if (sc->vr_cdata.vr_tx_cnt == 0)
                ifp->if_timer = 0;
 }
 
@@ -1164,19 +1176,22 @@
  * pointers to the fragment pointers.
  */
 int
-vr_encap(struct vr_softc *sc, struct vr_chain *c, struct mbuf *m_head)
+vr_encap(struct vr_softc *sc, struct vr_chain **cp, struct mbuf *m_head)
 {
+       struct vr_chain         *c = *cp;
        struct vr_desc          *f = NULL;
        struct mbuf             *m_new = NULL;
-       u_int32_t               vr_flags = 0, vr_status = 0;
+       u_int32_t               vr_ctl = 0, vr_status = 0;
+       bus_dmamap_t            txmap;
+       int                     i;
 
        if (sc->vr_quirks & VR_Q_CSUM) {
                if (m_head->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT)
-                       vr_flags |= VR_TXCTL_IPCSUM;
+                       vr_ctl |= VR_TXCTL_IPCSUM;
                if (m_head->m_pkthdr.csum_flags & M_TCP_CSUM_OUT)
-                       vr_flags |= VR_TXCTL_TCPCSUM;
+                       vr_ctl |= VR_TXCTL_TCPCSUM;
                if (m_head->m_pkthdr.csum_flags & M_UDP_CSUM_OUT)
-                       vr_flags |= VR_TXCTL_UDPCSUM;
+                       vr_ctl |= VR_TXCTL_UDPCSUM;
        }
 
        if (sc->vr_quirks & VR_Q_NEEDALIGN ||
@@ -1213,28 +1228,51 @@
                if (bus_dmamap_load_mbuf(sc->sc_dmat, c->vr_map, m_new,
                    BUS_DMA_NOWAIT | BUS_DMA_WRITE)) {
                        m_freem(m_new);
-                       return (1);
+                       return(ENOBUFS);
                }
+               c->vr_mbuf = m_new;
+       } else {
+               c->vr_mbuf = m_head;
        }
 
        bus_dmamap_sync(sc->sc_dmat, c->vr_map, 0, c->vr_map->dm_mapsize,
            BUS_DMASYNC_PREWRITE);
 
-       if (m_new != NULL) {
+       /* Check number of available descriptors. */
+       if (sc->vr_cdata.vr_tx_cnt + c->vr_map->dm_nsegs >= (VR_TX_LIST_CNT - 
1)) {
+               printf("vr_tx_cnt %i dm_nsegs %i\n", sc->vr_cdata.vr_tx_cnt, 
c->vr_map->dm_nsegs);
+               bus_dmamap_unload(sc->sc_dmat, c->vr_map);
+               if (m_new)
+                       m_freem(m_new);
+               return(ENOBUFS);
+       }
+       if (m_new)
                m_freem(m_head);
 
-               c->vr_mbuf = m_new;
-       } else
-               c->vr_mbuf = m_head;
-
-       f = c->vr_ptr;
-       f->vr_data = htole32(c->vr_map->dm_segs[0].ds_addr);
-       f->vr_ctl = htole32(c->vr_map->dm_mapsize);
-       f->vr_ctl |= htole32(vr_flags|VR_TXCTL_TLINK|VR_TXCTL_FIRSTFRAG);
-       f->vr_status = htole32(vr_status);
+       txmap = c->vr_map;
+       for (i = 0; i < txmap->dm_nsegs; i++) {
+               if (i != 0)
+                       *cp = c = c->vr_nextdesc;
+               f = c->vr_ptr;
+               f->vr_ctl = htole32(txmap->dm_segs[i].ds_len | VR_TXCTL_TLINK |
+                   vr_ctl);
+               if (i == 0)
+                       f->vr_ctl |= htole32(VR_TXCTL_FIRSTFRAG);
+               f->vr_status = htole32(vr_status);
+               f->vr_data = htole32(txmap->dm_segs[i].ds_addr);
+               f->vr_next = htole32(c->vr_nextdesc->vr_paddr);
+               sc->vr_cdata.vr_tx_cnt++;
+       }
 
-       f->vr_ctl |= htole32(VR_TXCTL_LASTFRAG|VR_TXCTL_FINT);
-       f->vr_next = htole32(c->vr_nextdesc->vr_paddr);
+       /*
+        * Set EOP on the last desciptor and request Tx completion
+        * interrupt for every VR_TX_INTR_THRESH-th frames.
+        */
+       VR_INC(sc->vr_cdata.vr_tx_pkts, VR_TX_INTR_THRESH);
+       if (sc->vr_cdata.vr_tx_pkts == 0)
+               f->vr_ctl |= htole32(VR_TXCTL_LASTFRAG | VR_TXCTL_FINT);
+       else
+               f->vr_ctl |= htole32(VR_TXCTL_LASTFRAG);
 
        return (0);
 }
@@ -1251,7 +1289,7 @@
 {
        struct vr_softc         *sc;
        struct mbuf             *m_head;
-       struct vr_chain         *cur_tx;
+       struct vr_chain         *cur_tx, *head_tx;
 
        sc = ifp->if_softc;
 
@@ -1265,7 +1303,8 @@
                        break;
 
                /* Pack the data into the descriptor. */
-               if (vr_encap(sc, cur_tx, m_head)) {
+               head_tx = cur_tx;
+               if (vr_encap(sc, &cur_tx, m_head)) {
                        /* Rollback, send what we were able to encap. */
                        if (ALTQ_IS_ENABLED(&ifp->if_snd))
                                m_freem(m_head);
@@ -1274,7 +1313,8 @@
                        break;
                }
 
-               VR_TXOWN(cur_tx) = htole32(VR_TXSTAT_OWN);
+               /* Only set ownership bit on first descriptor */
+               head_tx->vr_ptr->vr_status |= htole32(VR_TXSTAT_OWN);
 
 #if NBPFILTER > 0
                /*
@@ -1282,12 +1322,12 @@
                 * to him.
                 */
                if (ifp->if_bpf)
-                       bpf_mtap_ether(ifp->if_bpf, cur_tx->vr_mbuf,
+                       bpf_mtap_ether(ifp->if_bpf, head_tx->vr_mbuf,
                        BPF_DIRECTION_OUT);
 #endif
                cur_tx = cur_tx->vr_nextdesc;
        }
-       if (cur_tx != sc->vr_cdata.vr_tx_prod || cur_tx->vr_mbuf != NULL) {
+       if (sc->vr_cdata.vr_tx_cnt != 0) {
                sc->vr_cdata.vr_tx_prod = cur_tx;
 
                bus_dmamap_sync(sc->sc_dmat, sc->sc_listmap, 0,
@@ -1513,6 +1553,15 @@
        struct vr_softc         *sc;
 
        sc = ifp->if_softc;
+
+       /*
+        * Reclaim first as we don't request interrupt for every packet.
+        */
+       printf("vr_watchdog vr_tx_cnt %i\n",sc->vr_cdata.vr_tx_cnt);
+       vr_txeof(sc);
+       printf("vr_watchdog vr_tx_cnt %i (after 
txeof)\n",sc->vr_cdata.vr_tx_cnt);
+       if (sc->vr_cdata.vr_tx_cnt == 0)
+               return;
 
        ifp->if_oerrors++;
        printf("%s: watchdog timeout\n", sc->sc_dev.dv_xname);
Index: if_vrreg.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_vrreg.h,v
retrieving revision 1.30
diff -u -r1.30 if_vrreg.h
--- if_vrreg.h  5 Jan 2012 19:08:25 -0000       1.30
+++ if_vrreg.h  4 Oct 2012 17:12:08 -0000
@@ -417,13 +417,14 @@
 #define VR_TXCTL_LASTFRAG      0x00400000
 #define VR_TXCTL_FINT          0x00800000
 
-#define VR_MAXFRAGS            16
-#define VR_RX_LIST_CNT         64
+#define VR_MAXFRAGS            8
+#define VR_TX_INTR_THRESH      8
+#define VR_RX_LIST_CNT         128
 #define VR_TX_LIST_CNT         128
 #define VR_MIN_FRAMELEN                60
 #define VR_RXLEN               1524
 
-#define VR_TXOWN(x)            x->vr_ptr->vr_status
+#define VR_INC(x,y)            (x) = (((x) + 1) % y)
 
 struct vr_list_data {
        struct vr_desc          vr_rx_list[VR_RX_LIST_CNT];
@@ -456,6 +457,8 @@
 
        struct vr_chain         *vr_tx_cons;
        struct vr_chain         *vr_tx_prod;
+       int                     vr_tx_cnt;
+       int                     vr_tx_pkts;
 };
 
 struct vr_mii_frame {

Reply via email to