On 05/10/15(Mon) 12:42, Jonathan Matthew wrote:
> This brings bge(4) up to about where em(4) is.  It involves a few different
> changes, notably:
> - per-ring refill timeouts, to ensure we don't try to refill a ring from the
> timeout and an interrupt at the same time
> - removing the list of tx dma maps and just assigning a map to use based
> on the current ring slot number (this is why it's more - than +)
> - using atomics to adjust bge_txcnt, saving those adjustments until the
> end of the tx/txeof loops, and only acting on the adjusted value
> - not adding any mutexes
> 
> I've tested it on amd64 with these:
> 
> bge0 at pci1 dev 0 function 0 "Broadcom BCM5721" rev 0x21, BCM5750 C1 
> (0x4201): msi, address 00:18:f3:d1:80:64
> 
> bge0 at pci2 dev 2 function 0 "Broadcom BCM5703X" rev 0x02, BCM5702/5703 A2 
> (0x1002): apic 3 int 1, address 00:09:3d:00:84:d1
> 
> and on sparc64:
> 
> bge0 at pci7 dev 4 function 0 "Broadcom BCM5714" rev 0xa3, BCM5715 A3 
> (0x9003): ivec 0x795, address 00:14:4f:00:5a:5a
> 
> On the sparc64 box (a v245), with if_input_process unlocked, this gets me 
> 10-20%
> more pps or about 100mbps more in tcpbench (550, up from 450).
> 
> ok?

ok mpi@

> 
> Index: if_bgereg.h
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_bgereg.h,v
> retrieving revision 1.127
> diff -u -p -u -p -r1.127 if_bgereg.h
> --- if_bgereg.h       11 Sep 2015 13:02:28 -0000      1.127
> +++ if_bgereg.h       30 Sep 2015 11:14:09 -0000
> @@ -2830,11 +2830,6 @@ struct bge_type {
>  #define      BGE_TIMEOUT             100000
>  #define      BGE_TXCONS_UNSET                0xFFFF  /* impossible value */
>  
> -struct txdmamap_pool_entry {
> -     bus_dmamap_t dmamap;
> -     SLIST_ENTRY(txdmamap_pool_entry) link;
> -};
> -
>  #define      ASF_ENABLE              1
>  #define      ASF_NEW_HANDSHAKE       2
>  #define      ASF_STACKUP             4
> @@ -2934,11 +2929,11 @@ struct bge_softc {
>       int                     bge_txcnt;
>       struct timeout          bge_timeout;
>       struct timeout          bge_rxtimeout;
> +     struct timeout          bge_rxtimeout_jumbo;
>       u_int32_t               bge_rx_discards;
>       u_int32_t               bge_tx_discards;
>       u_int32_t               bge_rx_inerrors;
>       u_int32_t               bge_rx_overruns;
>       u_int32_t               bge_tx_collisions;
> -     SLIST_HEAD(, txdmamap_pool_entry) txdma_list;
> -     struct txdmamap_pool_entry *txdma[BGE_TX_RING_CNT];
> +     bus_dmamap_t            bge_txdma[BGE_TX_RING_CNT];
>  };
> Index: if_bge.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_bge.c,v
> retrieving revision 1.369
> diff -u -p -u -p -r1.369 if_bge.c
> --- if_bge.c  19 Jul 2015 06:28:12 -0000      1.369
> +++ if_bge.c  5 Oct 2015 01:06:21 -0000
> @@ -141,7 +141,7 @@ void bge_tick(void *);
>  void bge_stats_update(struct bge_softc *);
>  void bge_stats_update_regs(struct bge_softc *);
>  int bge_cksum_pad(struct mbuf *);
> -int bge_encap(struct bge_softc *, struct mbuf *, u_int32_t *);
> +int bge_encap(struct bge_softc *, struct mbuf *, int *);
>  int bge_compact_dma_runt(struct mbuf *);
>  
>  int bge_intr(void *);
> @@ -1262,20 +1262,31 @@ uncreate:
>       return (1);
>  }
>  
> +/*
> + * When the refill timeout for a ring is active, that ring is so empty
> + * that no more packets can be received on it, so the interrupt handler
> + * will not attempt to refill it, meaning we don't need to protect against
> + * interrupts here.
> + */
> +
>  void
>  bge_rxtick(void *arg)
>  {
>       struct bge_softc *sc = arg;
> -     int s;
>  
> -     s = splnet();
>       if (ISSET(sc->bge_flags, BGE_RXRING_VALID) &&
>           if_rxr_inuse(&sc->bge_std_ring) <= 8)
>               bge_fill_rx_ring_std(sc);
> +}
> +
> +void
> +bge_rxtick_jumbo(void *arg)
> +{
> +     struct bge_softc *sc = arg;
> +
>       if (ISSET(sc->bge_flags, BGE_JUMBO_RXRING_VALID) &&
>           if_rxr_inuse(&sc->bge_jumbo_ring) <= 8)
>               bge_fill_rx_ring_jumbo(sc);
> -     splx(s);
>  }
>  
>  void
> @@ -1410,7 +1421,7 @@ bge_fill_rx_ring_jumbo(struct bge_softc 
>        * that now, then try again later.
>        */
>       if (if_rxr_inuse(&sc->bge_jumbo_ring) <= 8)
> -             timeout_add(&sc->bge_rxtimeout, 1);
> +             timeout_add(&sc->bge_rxtimeout_jumbo, 1);
>  }
>  
>  void
> @@ -1446,7 +1457,6 @@ void
>  bge_free_tx_ring(struct bge_softc *sc)
>  {
>       int i;
> -     struct txdmamap_pool_entry *dma;
>  
>       if (!(sc->bge_flags & BGE_TXRING_VALID))
>               return;
> @@ -1455,18 +1465,12 @@ bge_free_tx_ring(struct bge_softc *sc)
>               if (sc->bge_cdata.bge_tx_chain[i] != NULL) {
>                       m_freem(sc->bge_cdata.bge_tx_chain[i]);
>                       sc->bge_cdata.bge_tx_chain[i] = NULL;
> -                     SLIST_INSERT_HEAD(&sc->txdma_list, sc->txdma[i],
> -                                         link);
> -                     sc->txdma[i] = 0;
> +                     sc->bge_cdata.bge_tx_map[i] = NULL;
>               }
>               bzero(&sc->bge_rdata->bge_tx_ring[i],
>                   sizeof(struct bge_tx_bd));
> -     }
>  
> -     while ((dma = SLIST_FIRST(&sc->txdma_list))) {
> -             SLIST_REMOVE_HEAD(&sc->txdma_list, link);
> -             bus_dmamap_destroy(sc->bge_dmatag, dma->dmamap);
> -             free(dma, M_DEVBUF, 0);
> +             bus_dmamap_destroy(sc->bge_dmatag, sc->bge_txdma[i]);
>       }
>  
>       sc->bge_flags &= ~BGE_TXRING_VALID;
> @@ -1476,9 +1480,7 @@ int
>  bge_init_tx_ring(struct bge_softc *sc)
>  {
>       int i;
> -     bus_dmamap_t dmamap;
>       bus_size_t txsegsz, txmaxsegsz;
> -     struct txdmamap_pool_entry *dma;
>  
>       if (sc->bge_flags & BGE_TXRING_VALID)
>               return (0);
> @@ -1505,22 +1507,10 @@ bge_init_tx_ring(struct bge_softc *sc)
>               txmaxsegsz = MCLBYTES;
>       }
>  
> -     SLIST_INIT(&sc->txdma_list);
>       for (i = 0; i < BGE_TX_RING_CNT; i++) {
>               if (bus_dmamap_create(sc->bge_dmatag, txmaxsegsz,
> -                 BGE_NTXSEG, txsegsz, 0, BUS_DMA_NOWAIT, &dmamap))
> +                 BGE_NTXSEG, txsegsz, 0, BUS_DMA_NOWAIT, &sc->bge_txdma[i]))
>                       return (ENOBUFS);
> -             if (dmamap == NULL)
> -                     panic("dmamap NULL in bge_init_tx_ring");
> -             dma = malloc(sizeof(*dma), M_DEVBUF, M_NOWAIT);
> -             if (dma == NULL) {
> -                     printf("%s: can't alloc txdmamap_pool_entry\n",
> -                         sc->bge_dev.dv_xname);
> -                     bus_dmamap_destroy(sc->bge_dmatag, dmamap);
> -                     return (ENOMEM);
> -             }
> -             dma->dmamap = dmamap;
> -             SLIST_INSERT_HEAD(&sc->txdma_list, dma, link);
>       }
>  
>       sc->bge_flags |= BGE_TXRING_VALID;
> @@ -3081,8 +3071,8 @@ bge_attach(struct device *parent, struct
>  
>       /* Hookup IRQ last. */
>       DPRINTFN(5, ("pci_intr_establish\n"));
> -     sc->bge_intrhand = pci_intr_establish(pc, ih, IPL_NET, bge_intr, sc,
> -         sc->bge_dev.dv_xname);
> +     sc->bge_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
> +         bge_intr, sc, sc->bge_dev.dv_xname);
>       if (sc->bge_intrhand == NULL) {
>               printf(": couldn't establish interrupt");
>               if (intrstr != NULL)
> @@ -3139,6 +3129,7 @@ bge_attach(struct device *parent, struct
>  
>       timeout_set(&sc->bge_timeout, bge_tick, sc);
>       timeout_set(&sc->bge_rxtimeout, bge_rxtick, sc);
> +     timeout_set(&sc->bge_rxtimeout_jumbo, bge_rxtick_jumbo, sc);
>       return;
>  
>  fail_6:
> @@ -3578,15 +3569,17 @@ bge_txeof(struct bge_softc *sc)
>  {
>       struct bge_tx_bd *cur_tx = NULL;
>       struct ifnet *ifp;
> -     struct txdmamap_pool_entry *dma;
> +     bus_dmamap_t dmamap;
>       bus_addr_t offset, toff;
>       bus_size_t tlen;
> -     int tosync;
> +     int tosync, freed, txcnt;
> +     u_int32_t cons, newcons;
>       struct mbuf *m;
>  
>       /* Nothing to do */
> -     if (sc->bge_tx_saved_considx ==
> -         sc->bge_rdata->bge_status_block.bge_idx[0].bge_tx_cons_idx)
> +     cons = sc->bge_tx_saved_considx; 
> +     newcons = sc->bge_rdata->bge_status_block.bge_idx[0].bge_tx_cons_idx;
> +     if (cons == newcons)
>               return;
>  
>       ifp = &sc->arpcom.ac_if;
> @@ -3597,14 +3590,12 @@ bge_txeof(struct bge_softc *sc)
>           BUS_DMASYNC_POSTREAD);
>  
>       offset = offsetof(struct bge_ring_data, bge_tx_ring);
> -     tosync = sc->bge_rdata->bge_status_block.bge_idx[0].bge_tx_cons_idx -
> -         sc->bge_tx_saved_considx;
> +     tosync = newcons - cons;
>  
> -     toff = offset + (sc->bge_tx_saved_considx * sizeof (struct bge_tx_bd));
> +     toff = offset + (cons * sizeof (struct bge_tx_bd));
>  
>       if (tosync < 0) {
> -             tlen = (BGE_TX_RING_CNT - sc->bge_tx_saved_considx) *
> -                 sizeof (struct bge_tx_bd);
> +             tlen = (BGE_TX_RING_CNT - cons) * sizeof (struct bge_tx_bd);
>               bus_dmamap_sync(sc->bge_dmatag, sc->bge_ring_map,
>                   toff, tlen, BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
>               tosync = -tosync;
> @@ -3618,34 +3609,35 @@ bge_txeof(struct bge_softc *sc)
>        * Go through our tx ring and free mbufs for those
>        * frames that have been sent.
>        */
> -     while (sc->bge_tx_saved_considx !=
> -         sc->bge_rdata->bge_status_block.bge_idx[0].bge_tx_cons_idx) {
> -             u_int32_t               idx = 0;
> -
> -             idx = sc->bge_tx_saved_considx;
> -             cur_tx = &sc->bge_rdata->bge_tx_ring[idx];
> +     freed = 0;
> +     while (cons != newcons) {
> +             cur_tx = &sc->bge_rdata->bge_tx_ring[cons];
>               if (cur_tx->bge_flags & BGE_TXBDFLAG_END)
>                       ifp->if_opackets++;
> -             m = sc->bge_cdata.bge_tx_chain[idx];
> +             m = sc->bge_cdata.bge_tx_chain[cons];
>               if (m != NULL) {
> -                     sc->bge_cdata.bge_tx_chain[idx] = NULL;
> -                     dma = sc->txdma[idx];
> -                     bus_dmamap_sync(sc->bge_dmatag, dma->dmamap, 0,
> -                         dma->dmamap->dm_mapsize, BUS_DMASYNC_POSTWRITE);
> -                     bus_dmamap_unload(sc->bge_dmatag, dma->dmamap);
> -                     SLIST_INSERT_HEAD(&sc->txdma_list, dma, link);
> -                     sc->txdma[idx] = NULL;
> +                     dmamap = sc->bge_cdata.bge_tx_map[cons];
> +
> +                     sc->bge_cdata.bge_tx_chain[cons] = NULL;
> +                     sc->bge_cdata.bge_tx_map[cons] = NULL;
> +                     bus_dmamap_sync(sc->bge_dmatag, dmamap, 0,
> +                         dmamap->dm_mapsize, BUS_DMASYNC_POSTWRITE);
> +                     bus_dmamap_unload(sc->bge_dmatag, dmamap);
>  
>                       m_freem(m);
>               }
> -             sc->bge_txcnt--;
> -             BGE_INC(sc->bge_tx_saved_considx, BGE_TX_RING_CNT);
> +             freed++;
> +             BGE_INC(cons, BGE_TX_RING_CNT);
>       }
>  
> -     if (sc->bge_txcnt < BGE_TX_RING_CNT - 16)
> +     txcnt = atomic_sub_int_nv(&sc->bge_txcnt, freed);
> +
> +     if (txcnt < BGE_TX_RING_CNT - 16)
>               ifp->if_flags &= ~IFF_OACTIVE;
> -     if (sc->bge_txcnt == 0)
> +     if (txcnt == 0)
>               ifp->if_timer = 0;
> +
> +     sc->bge_tx_saved_considx = cons;
>  }
>  
>  int
> @@ -3693,8 +3685,11 @@ bge_intr(void *xsc)
>  
>       if (BGE_ASICREV(sc->bge_chipid) == BGE_ASICREV_BCM5700 ||
>           statusword & BGE_STATFLAG_LINKSTATE_CHANGED ||
> -         BGE_STS_BIT(sc, BGE_STS_LINK_EVT))
> +         BGE_STS_BIT(sc, BGE_STS_LINK_EVT)) {
> +             KERNEL_LOCK();
>               bge_link_upd(sc);
> +             KERNEL_UNLOCK();
> +     }
>  
>       /* Re-enable interrupts. */
>       bge_writembx(sc, BGE_MBX_IRQ0_LO, statustag);
> @@ -3706,8 +3701,11 @@ bge_intr(void *xsc)
>               /* Check TX ring producer/consumer */
>               bge_txeof(sc);
>  
> -             if (!IFQ_IS_EMPTY(&ifp->if_snd))
> +             if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
> +                     KERNEL_LOCK();
>                       bge_start(ifp);
> +                     KERNEL_UNLOCK();
> +             }
>       }
>  
>       return (1);
> @@ -3987,16 +3985,15 @@ bge_cksum_pad(struct mbuf *m)
>   * pointers to descriptors.
>   */
>  int
> -bge_encap(struct bge_softc *sc, struct mbuf *m_head, u_int32_t *txidx)
> +bge_encap(struct bge_softc *sc, struct mbuf *m_head, int *txinc)
>  {
>       struct bge_tx_bd        *f = NULL;
>       u_int32_t               frag, cur;
>       u_int16_t               csum_flags = 0;
> -     struct txdmamap_pool_entry *dma;
> -     bus_dmamap_t dmamap;
> +     bus_dmamap_t            dmamap;
>       int                     i = 0;
>  
> -     cur = frag = *txidx;
> +     cur = frag = (sc->bge_tx_prodidx + *txinc) % BGE_TX_RING_CNT;
>  
>       if (m_head->m_pkthdr.csum_flags) {
>               if (m_head->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT)
> @@ -4026,10 +4023,7 @@ bge_encap(struct bge_softc *sc, struct m
>               return (ENOBUFS);
>  
>  doit:
> -     dma = SLIST_FIRST(&sc->txdma_list);
> -     if (dma == NULL)
> -             return (ENOBUFS);
> -     dmamap = dma->dmamap;
> +     dmamap = sc->bge_txdma[cur];
>  
>       /*
>        * Start packing the mbufs in this chain into
> @@ -4052,7 +4046,7 @@ doit:
>       }
>  
>       /* Check if we have enough free send BDs. */
> -     if (sc->bge_txcnt + dmamap->dm_nsegs >= BGE_TX_RING_CNT)
> +     if (sc->bge_txcnt + *txinc + dmamap->dm_nsegs >= BGE_TX_RING_CNT)
>               goto fail_unload;
>  
>       for (i = 0; i < dmamap->dm_nsegs; i++) {
> @@ -4084,11 +4078,9 @@ doit:
>  
>       sc->bge_rdata->bge_tx_ring[cur].bge_flags |= BGE_TXBDFLAG_END;
>       sc->bge_cdata.bge_tx_chain[cur] = m_head;
> -     SLIST_REMOVE_HEAD(&sc->txdma_list, link);
> -     sc->txdma[cur] = dma;
> -     sc->bge_txcnt += dmamap->dm_nsegs;
> -
> -     *txidx = frag;
> +     sc->bge_cdata.bge_tx_map[cur] = dmamap;
> +     
> +     *txinc += dmamap->dm_nsegs;
>  
>       return (0);
>  
> @@ -4107,8 +4099,7 @@ bge_start(struct ifnet *ifp)
>  {
>       struct bge_softc *sc;
>       struct mbuf *m_head;
> -     u_int32_t prodidx;
> -     int pkts;
> +     int txinc;
>  
>       sc = ifp->if_softc;
>  
> @@ -4117,55 +4108,44 @@ bge_start(struct ifnet *ifp)
>       if (!BGE_STS_BIT(sc, BGE_STS_LINK))
>               return;
>  
> -     prodidx = sc->bge_tx_prodidx;
> -
> -     for (pkts = 0; !IFQ_IS_EMPTY(&ifp->if_snd);) {
> -             if (sc->bge_txcnt > BGE_TX_RING_CNT - 16) {
> -                     ifp->if_flags |= IFF_OACTIVE;
> -                     break;
> -             }
> -
> +     txinc = 0;
> +     while (1) {
>               IFQ_POLL(&ifp->if_snd, m_head);
>               if (m_head == NULL)
>                       break;
>  
> -             /*
> -              * Pack the data into the transmit ring. If we
> -              * don't have room, set the OACTIVE flag and wait
> -              * for the NIC to drain the ring.
> -              */
> -             if (bge_encap(sc, m_head, &prodidx)) {
> -                     ifp->if_flags |= IFF_OACTIVE;
> +             if (bge_encap(sc, m_head, &txinc))
>                       break;
> -             }
>  
>               /* now we are committed to transmit the packet */
>               IFQ_DEQUEUE(&ifp->if_snd, m_head);
> -             pkts++;
>  
>  #if NBPFILTER > 0
> -             /*
> -              * If there's a BPF listener, bounce a copy of this frame
> -              * to him.
> -              */
>               if (ifp->if_bpf)
>                       bpf_mtap_ether(ifp->if_bpf, m_head, BPF_DIRECTION_OUT);
>  #endif
>       }
> -     if (pkts == 0)
> -             return;
>  
> -     /* Transmit */
> -     bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, prodidx);
> -     if (BGE_CHIPREV(sc->bge_chipid) == BGE_CHIPREV_5700_BX)
> -             bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, prodidx);
> +     if (txinc != 0) {
> +             int txcnt;
>  
> -     sc->bge_tx_prodidx = prodidx;
> +             /* Transmit */
> +             sc->bge_tx_prodidx = (sc->bge_tx_prodidx + txinc) %
> +                 BGE_TX_RING_CNT;
> +             bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, sc->bge_tx_prodidx);
> +             if (BGE_CHIPREV(sc->bge_chipid) == BGE_CHIPREV_5700_BX)
> +                     bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO,
> +                         sc->bge_tx_prodidx);
>  
> -     /*
> -      * Set a timeout in case the chip goes out to lunch.
> -      */
> -     ifp->if_timer = 5;
> +             txcnt = atomic_add_int_nv(&sc->bge_txcnt, txinc);
> +             if (txcnt > BGE_TX_RING_CNT - 16)
> +                     ifp->if_flags |= IFF_OACTIVE;
> +
> +             /*
> +              * Set a timeout in case the chip goes out to lunch.
> +              */
> +             ifp->if_timer = 5;
> +     }
>  }
>  
>  void
> @@ -4580,6 +4560,7 @@ bge_stop(struct bge_softc *sc)
>  
>       timeout_del(&sc->bge_timeout);
>       timeout_del(&sc->bge_rxtimeout);
> +     timeout_del(&sc->bge_rxtimeout_jumbo);
>  
>       ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
>  
> @@ -4639,6 +4620,8 @@ bge_stop(struct bge_softc *sc)
>        * Tell firmware we're shutting down.
>        */
>       BGE_CLRBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP);
> +
> +     intr_barrier(sc->bge_intrhand);
>  
>       /* Free the RX lists. */
>       bge_free_rx_ring_std(sc);
> 

  • unlock bge(4) Jonathan Matthew
    • Re: unlock bge(4) Martin Pieuchot

Reply via email to