Hi, 

This reduces tsec(4) TX interrupts by over a factor of four per interface,
boosting throughput by a couple of percent for

        $ dd if=/dev/zero bs=4096 | nc ${host} ${port}

It does this by reducing TX interrupts notifications to one per frame, from
one per mbuf fragment, and by enabling TX interrupt coalescing.

I've chosen conservative coalescing parameters. The card now interrupts every
four tx frames, leaving the tx ring fuller on average. But ample room remains
on the card's tx ring of 256 descriptors, which can hold 16 frames in the 
worst case of 16 mbuf fragments per frame. Testing showed descriptor use 
peaking at 13 descriptors under load.

The hold-off timer, ensuring stale frames are not left on the tx ring 
indefinitely, is not crucial for tx: as the frame has already been transmitted, 
latency isn't a concern. It need only last longer than the time to transmit the 
coalesced frames, and I've set it much longer, roughly 2ms for 1000baseT, 
to give the stack some slack when feeding the card.

While here, also makes tsec_encap() error handling a tad more robust.

Tested on RB600A.

best, 
Richard. 

Index: if_tsec.c
===================================================================
RCS file: /cvs/src/sys/arch/socppc/dev/if_tsec.c,v
retrieving revision 1.39
diff -u -p -u -r1.39 if_tsec.c
--- if_tsec.c   6 Nov 2015 11:35:48 -0000       1.39
+++ if_tsec.c   10 Nov 2015 01:32:31 -0000
@@ -121,6 +121,8 @@ extern void myetheraddr(u_char *);
 #define TSEC_TCTRL             0x100
 #define TSEC_TSTAT             0x104
 #define  TSEC_TSTAT_THLT       0x80000000
+#define TSEC_TXIC              0x110
+#define  TSEC_TXIC_ICEN                0x80000000
 #define TSEC_TBPTR             0x184
 #define TSEC_TBASE             0x204
 
@@ -536,7 +538,7 @@ tsec_start(struct ifnet *ifp)
                        ifp->if_flags |= IFF_OACTIVE;
                        break;
                } 
-               if (error == EFBIG) {
+               if (error) {
                        IFQ_DEQUEUE(&ifp->if_snd, m);
                        m_freem(m); /* give up: drop it */
                        ifp->if_oerrors++;
@@ -1020,6 +1022,9 @@ tsec_up(struct tsec_softc *sc)
        attr |= TSEC_ATTR_RBDSEN;
        tsec_write(sc, TSEC_ATTR, attr);
 
+       /* TX interrupts every 4 TSEC_TX_I with ~2ms hold-off @ 1000baseT */
+       tsec_write(sc, TSEC_TXIC, (TSEC_TXIC_ICEN | (0x4 << 21) | 0x1000));
+
        tsec_write(sc, TSEC_TSTAT, TSEC_TSTAT_THLT);
        tsec_write(sc, TSEC_RSTAT, TSEC_RSTAT_QHLT);
 
@@ -1158,12 +1163,14 @@ tsec_encap(struct tsec_softc *sc, struct
        for (i = 0; i < map->dm_nsegs; i++) {
                status = txd->td_status & TSEC_TX_W;
                status |= TSEC_TX_TO1;
+               status |= TSEC_TX_TC;
                if (i == (map->dm_nsegs - 1))
-                       status |= TSEC_TX_L;
+                       status |= TSEC_TX_L | TSEC_TX_I;
+
                txd->td_len = map->dm_segs[i].ds_len;
                txd->td_addr = map->dm_segs[i].ds_addr;
                __asm volatile("eieio" ::: "memory");
-               txd->td_status = status | TSEC_TX_R | TSEC_TX_I | TSEC_TX_TC;
+               txd->td_status = status | TSEC_TX_R;
 
                bus_dmamap_sync(sc->sc_dmat, TSEC_DMA_MAP(sc->sc_txring),
                    frag * sizeof(*txd), sizeof(*txd), BUS_DMASYNC_PREWRITE);

Reply via email to