On 05/12/15(Sat) 15:41, Claudio Jeker wrote: > So Mark and I spent some time to figure out what the issue was with ix(4) > based on that info I resurected the em(4) mpsafe diff that got backed out > and I applied the same fix. It is somewhat unclear if this fixes the > watchdog timeouts since in theory the wdog timer should be stopped when > hitting the race condition we hit in ix(4). > > I'm currently hammering my test system with this and until now I have not > seen a watchdog fired.
It is the right time to get this in. ok mpi@ > -- > :wq Claudio > > Index: pci/if_em.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/if_em.c,v > retrieving revision 1.313 > diff -u -p -r1.313 if_em.c > --- pci/if_em.c 25 Nov 2015 03:09:59 -0000 1.313 > +++ pci/if_em.c 3 Dec 2015 22:08:54 -0000 > @@ -612,6 +612,16 @@ em_start(struct ifnet *ifp) > if (em_encap(sc, m_head)) { > ifq_deq_rollback(&ifp->if_snd, m_head); > ifq_set_oactive(&ifp->if_snd); > + /* > + * Make sure there are still packets on the > + * ring. The interrupt handler may have > + * cleaned up the ring before we were able to > + * set the IF_OACTIVE flag. > + */ > + if (sc->num_tx_desc_avail == sc->num_tx_desc) { > + ifq_clr_oactive(&ifp->if_snd); > + continue; > + } > break; > } > > @@ -918,20 +928,17 @@ em_intr(void *arg) > if (reg_icr & E1000_ICR_RXO) > sc->rx_overruns++; > > - KERNEL_LOCK(); > - > /* Link status change */ > if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { > + KERNEL_LOCK(); > sc->hw.get_link_status = 1; > em_check_for_link(&sc->hw); > em_update_link_status(sc); > + if (!IFQ_IS_EMPTY(&ifp->if_snd)) > + em_start(ifp); > + KERNEL_UNLOCK(); > } > > - if (ifp->if_flags & IFF_RUNNING && !IFQ_IS_EMPTY(&ifp->if_snd)) > - em_start(ifp); > - > - KERNEL_UNLOCK(); > - > if (refill && em_rxfill(sc)) { > /* Advance the Rx Queue #0 "Tail Pointer". */ > E1000_WRITE_REG(&sc->hw, RDT, sc->last_rx_desc_filled); > @@ -1108,17 +1115,10 @@ em_encap(struct em_softc *sc, struct mbu > struct em_buffer *tx_buffer, *tx_buffer_mapped; > struct em_tx_desc *current_tx_desc = NULL; > > - /* > - * Force a cleanup if number of TX descriptors > - * available hits the threshold > - */ > - if (sc->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) { > - em_txeof(sc); > - /* Now do we at least have a minimal? */ > - if (sc->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) { > - sc->no_tx_desc_avail1++; > - return (ENOBUFS); > - } > + /* Check that we have least the minimal number of TX descriptors. */ > + if (sc->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) { > + sc->no_tx_desc_avail1++; > + return (ENOBUFS); > } > > if (sc->hw.mac_type == em_82547) { > @@ -1220,12 +1220,6 @@ em_encap(struct em_softc *sc, struct mbu > } > } > > - sc->next_avail_tx_desc = i; > - if (sc->pcix_82544) > - sc->num_tx_desc_avail -= txd_used; > - else > - sc->num_tx_desc_avail -= map->dm_nsegs; > - > #if NVLAN > 0 > /* Find out if we are in VLAN mode */ > if (m_head->m_flags & M_VLANTAG) { > @@ -1259,6 +1253,14 @@ em_encap(struct em_softc *sc, struct mbu > tx_buffer = &sc->tx_buffer_area[first]; > tx_buffer->next_eop = last; > > + membar_producer(); > + > + sc->next_avail_tx_desc = i; > + if (sc->pcix_82544) > + atomic_sub_int(&sc->num_tx_desc_avail, txd_used); > + else > + atomic_sub_int(&sc->num_tx_desc_avail, map->dm_nsegs); > + > /* > * Advance the Transmit Descriptor Tail (Tdt), > * this tells the E1000 that this frame is > @@ -2389,10 +2391,12 @@ em_transmit_checksum_setup(struct em_sof > tx_buffer->m_head = NULL; > tx_buffer->next_eop = -1; > > + membar_producer(); > + > if (++curr_txd == sc->num_tx_desc) > curr_txd = 0; > > - sc->num_tx_desc_avail--; > + atomic_dec_int(&sc->num_tx_desc_avail); > sc->next_avail_tx_desc = curr_txd; > } > > @@ -2406,7 +2410,7 @@ em_transmit_checksum_setup(struct em_sof > void > em_txeof(struct em_softc *sc) > { > - int first, last, done, num_avail; > + int first, last, done, num_avail, free = 0; > struct em_buffer *tx_buffer; > struct em_tx_desc *tx_desc, *eop_desc; > struct ifnet *ifp = &sc->interface_data.ac_if; > @@ -2414,9 +2418,8 @@ em_txeof(struct em_softc *sc) > if (sc->num_tx_desc_avail == sc->num_tx_desc) > return; > > - KERNEL_LOCK(); > + membar_consumer(); > > - num_avail = sc->num_tx_desc_avail; > first = sc->next_tx_to_clean; > tx_desc = &sc->tx_desc_base[first]; > tx_buffer = &sc->tx_buffer_area[first]; > @@ -2440,7 +2443,7 @@ em_txeof(struct em_softc *sc) > while (first != done) { > tx_desc->upper.data = 0; > tx_desc->lower.data = 0; > - num_avail++; > + free++; > > if (tx_buffer->m_head != NULL) { > ifp->if_opackets++; > @@ -2480,25 +2483,22 @@ em_txeof(struct em_softc *sc) > > sc->next_tx_to_clean = first; > > - /* > - * If we have enough room, clear IFF_OACTIVE to tell the stack > - * that it is OK to send packets. > - * If there are no pending descriptors, clear the timeout. Otherwise, > - * if some descriptors have been freed, restart the timeout. > - */ > - if (num_avail > EM_TX_CLEANUP_THRESHOLD) > - ifq_clr_oactive(&ifp->if_snd); > + num_avail = atomic_add_int_nv(&sc->num_tx_desc_avail, free); > > /* All clean, turn off the timer */ > if (num_avail == sc->num_tx_desc) > ifp->if_timer = 0; > - /* Some cleaned, reset the timer */ > - else if (num_avail != sc->num_tx_desc_avail) > - ifp->if_timer = EM_TX_TIMEOUT; > > - sc->num_tx_desc_avail = num_avail; > - > - KERNEL_UNLOCK(); > + /* > + * If we have enough room, clear IFF_OACTIVE to tell the stack > + * that it is OK to send packets. > + */ > + if (ifq_is_oactive(&ifp->if_snd) && num_avail > EM_TX_OP_THRESHOLD) { > + ifq_clr_oactive(&ifp->if_snd); > + KERNEL_LOCK(); > + em_start(ifp); > + KERNEL_UNLOCK(); > + } > } > > /********************************************************************* > Index: pci/if_em.h > =================================================================== > RCS file: /cvs/src/sys/dev/pci/if_em.h,v > retrieving revision 1.61 > diff -u -p -r1.61 if_em.h > --- pci/if_em.h 24 Nov 2015 17:11:39 -0000 1.61 > +++ pci/if_em.h 3 Dec 2015 22:14:43 -0000 > @@ -49,6 +49,7 @@ POSSIBILITY OF SUCH DAMAGE. > #include <sys/device.h> > #include <sys/socket.h> > #include <sys/timeout.h> > +#include <sys/atomic.h> > > #include <net/if.h> > #include <net/if_media.h> > @@ -181,10 +182,9 @@ typedef int boolean_t; > #define EM_TX_TIMEOUT 5 /* set to 5 seconds */ > > /* > - * These parameters control when the driver calls the routine to reclaim > - * transmit descriptors. > + * Thise parameter controls the minimum number of available transmit > + * descriptors needed before we attempt transmission of a packet. > */ > -#define EM_TX_CLEANUP_THRESHOLD (sc->num_tx_desc / 8) > #define EM_TX_OP_THRESHOLD (sc->num_tx_desc / 32) > > /* > @@ -350,8 +350,8 @@ struct em_softc { > struct em_tx_desc *tx_desc_base; > u_int32_t next_avail_tx_desc; > u_int32_t next_tx_to_clean; > - volatile u_int16_t num_tx_desc_avail; > - u_int16_t num_tx_desc; > + volatile u_int32_t num_tx_desc_avail; > + u_int32_t num_tx_desc; > u_int32_t txd_cmd; > struct em_buffer *tx_buffer_area; > bus_dma_tag_t txtag; /* dma tag for tx */ >