On 05/12/15(Sat) 15:41, Claudio Jeker wrote:
> So Mark and I spent some time to figure out what the issue was with ix(4)
> based on that info I resurected the em(4) mpsafe diff that got backed out
> and I applied the same fix. It is somewhat unclear if this fixes the
> watchdog timeouts since in theory the wdog timer should be stopped when
> hitting the race condition we hit in ix(4).
> 
> I'm currently hammering my test system with this and until now I have not
> seen a watchdog fired.

It is the right time to get this in.  ok mpi@

> -- 
> :wq Claudio
> 
> Index: pci/if_em.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_em.c,v
> retrieving revision 1.313
> diff -u -p -r1.313 if_em.c
> --- pci/if_em.c       25 Nov 2015 03:09:59 -0000      1.313
> +++ pci/if_em.c       3 Dec 2015 22:08:54 -0000
> @@ -612,6 +612,16 @@ em_start(struct ifnet *ifp)
>               if (em_encap(sc, m_head)) {
>                       ifq_deq_rollback(&ifp->if_snd, m_head);
>                       ifq_set_oactive(&ifp->if_snd);
> +                     /*
> +                      *  Make sure there are still packets on the
> +                      *  ring.  The interrupt handler may have
> +                      *  cleaned up the ring before we were able to
> +                      *  set the IF_OACTIVE flag.
> +                      */
> +                     if (sc->num_tx_desc_avail == sc->num_tx_desc) {
> +                             ifq_clr_oactive(&ifp->if_snd);
> +                             continue;
> +                     }
>                       break;
>               }
>  
> @@ -918,20 +928,17 @@ em_intr(void *arg)
>       if (reg_icr & E1000_ICR_RXO)
>               sc->rx_overruns++;
>  
> -     KERNEL_LOCK();
> -
>       /* Link status change */
>       if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
> +             KERNEL_LOCK();
>               sc->hw.get_link_status = 1;
>               em_check_for_link(&sc->hw);
>               em_update_link_status(sc);
> +             if (!IFQ_IS_EMPTY(&ifp->if_snd))
> +                     em_start(ifp);
> +             KERNEL_UNLOCK();
>       }
>  
> -     if (ifp->if_flags & IFF_RUNNING && !IFQ_IS_EMPTY(&ifp->if_snd))
> -             em_start(ifp);
> -
> -     KERNEL_UNLOCK();
> -
>       if (refill && em_rxfill(sc)) {
>               /* Advance the Rx Queue #0 "Tail Pointer". */
>               E1000_WRITE_REG(&sc->hw, RDT, sc->last_rx_desc_filled);
> @@ -1108,17 +1115,10 @@ em_encap(struct em_softc *sc, struct mbu
>       struct em_buffer   *tx_buffer, *tx_buffer_mapped;
>       struct em_tx_desc *current_tx_desc = NULL;
>  
> -     /*
> -      * Force a cleanup if number of TX descriptors
> -      * available hits the threshold
> -      */
> -     if (sc->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
> -             em_txeof(sc);
> -             /* Now do we at least have a minimal? */
> -             if (sc->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
> -                     sc->no_tx_desc_avail1++;
> -                     return (ENOBUFS);
> -             }
> +     /* Check that we have least the minimal number of TX descriptors. */
> +     if (sc->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
> +             sc->no_tx_desc_avail1++;
> +             return (ENOBUFS);
>       }
>  
>       if (sc->hw.mac_type == em_82547) {
> @@ -1220,12 +1220,6 @@ em_encap(struct em_softc *sc, struct mbu
>               }
>       }
>  
> -     sc->next_avail_tx_desc = i;
> -     if (sc->pcix_82544)
> -             sc->num_tx_desc_avail -= txd_used;
> -     else
> -             sc->num_tx_desc_avail -= map->dm_nsegs;
> -
>  #if NVLAN > 0
>       /* Find out if we are in VLAN mode */
>       if (m_head->m_flags & M_VLANTAG) {
> @@ -1259,6 +1253,14 @@ em_encap(struct em_softc *sc, struct mbu
>       tx_buffer = &sc->tx_buffer_area[first];
>       tx_buffer->next_eop = last;
>  
> +     membar_producer();
> +
> +     sc->next_avail_tx_desc = i;
> +     if (sc->pcix_82544)
> +             atomic_sub_int(&sc->num_tx_desc_avail, txd_used);
> +     else
> +             atomic_sub_int(&sc->num_tx_desc_avail, map->dm_nsegs);
> +
>       /* 
>        * Advance the Transmit Descriptor Tail (Tdt),
>        * this tells the E1000 that this frame is
> @@ -2389,10 +2391,12 @@ em_transmit_checksum_setup(struct em_sof
>       tx_buffer->m_head = NULL;
>       tx_buffer->next_eop = -1;
>  
> +     membar_producer();
> +
>       if (++curr_txd == sc->num_tx_desc)
>               curr_txd = 0;
>  
> -     sc->num_tx_desc_avail--;
> +     atomic_dec_int(&sc->num_tx_desc_avail);
>       sc->next_avail_tx_desc = curr_txd;
>  }
>  
> @@ -2406,7 +2410,7 @@ em_transmit_checksum_setup(struct em_sof
>  void
>  em_txeof(struct em_softc *sc)
>  {
> -     int first, last, done, num_avail;
> +     int first, last, done, num_avail, free = 0;
>       struct em_buffer *tx_buffer;
>       struct em_tx_desc   *tx_desc, *eop_desc;
>       struct ifnet   *ifp = &sc->interface_data.ac_if;
> @@ -2414,9 +2418,8 @@ em_txeof(struct em_softc *sc)
>       if (sc->num_tx_desc_avail == sc->num_tx_desc)
>               return;
>  
> -     KERNEL_LOCK();
> +     membar_consumer();
>  
> -     num_avail = sc->num_tx_desc_avail;
>       first = sc->next_tx_to_clean;
>       tx_desc = &sc->tx_desc_base[first];
>       tx_buffer = &sc->tx_buffer_area[first];
> @@ -2440,7 +2443,7 @@ em_txeof(struct em_softc *sc)
>               while (first != done) {
>                       tx_desc->upper.data = 0;
>                       tx_desc->lower.data = 0;
> -                     num_avail++;
> +                     free++;
>  
>                       if (tx_buffer->m_head != NULL) {
>                               ifp->if_opackets++;
> @@ -2480,25 +2483,22 @@ em_txeof(struct em_softc *sc)
>  
>       sc->next_tx_to_clean = first;
>  
> -     /*
> -      * If we have enough room, clear IFF_OACTIVE to tell the stack
> -      * that it is OK to send packets.
> -      * If there are no pending descriptors, clear the timeout. Otherwise,
> -      * if some descriptors have been freed, restart the timeout.
> -      */
> -     if (num_avail > EM_TX_CLEANUP_THRESHOLD)
> -             ifq_clr_oactive(&ifp->if_snd);
> +     num_avail = atomic_add_int_nv(&sc->num_tx_desc_avail, free);
>  
>       /* All clean, turn off the timer */
>       if (num_avail == sc->num_tx_desc)
>               ifp->if_timer = 0;
> -     /* Some cleaned, reset the timer */
> -     else if (num_avail != sc->num_tx_desc_avail)
> -             ifp->if_timer = EM_TX_TIMEOUT;
>  
> -     sc->num_tx_desc_avail = num_avail;
> -
> -     KERNEL_UNLOCK();
> +     /*
> +      * If we have enough room, clear IFF_OACTIVE to tell the stack
> +      * that it is OK to send packets.
> +      */
> +     if (ifq_is_oactive(&ifp->if_snd) && num_avail > EM_TX_OP_THRESHOLD) {
> +             ifq_clr_oactive(&ifp->if_snd);
> +             KERNEL_LOCK();
> +             em_start(ifp);
> +             KERNEL_UNLOCK();
> +     }
>  }
>  
>  /*********************************************************************
> Index: pci/if_em.h
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_em.h,v
> retrieving revision 1.61
> diff -u -p -r1.61 if_em.h
> --- pci/if_em.h       24 Nov 2015 17:11:39 -0000      1.61
> +++ pci/if_em.h       3 Dec 2015 22:14:43 -0000
> @@ -49,6 +49,7 @@ POSSIBILITY OF SUCH DAMAGE.
>  #include <sys/device.h>
>  #include <sys/socket.h>
>  #include <sys/timeout.h>
> +#include <sys/atomic.h>
>  
>  #include <net/if.h>
>  #include <net/if_media.h>
> @@ -181,10 +182,9 @@ typedef int      boolean_t;
>  #define EM_TX_TIMEOUT                        5       /* set to 5 seconds */
>  
>  /*
> - * These parameters control when the driver calls the routine to reclaim
> - * transmit descriptors.
> + * Thise parameter controls the minimum number of available transmit
> + * descriptors needed before we attempt transmission of a packet.
>   */
> -#define EM_TX_CLEANUP_THRESHOLD              (sc->num_tx_desc / 8)
>  #define EM_TX_OP_THRESHOLD           (sc->num_tx_desc / 32)
>  
>  /*
> @@ -350,8 +350,8 @@ struct em_softc {
>       struct em_tx_desc       *tx_desc_base;
>       u_int32_t               next_avail_tx_desc;
>       u_int32_t               next_tx_to_clean;
> -     volatile u_int16_t      num_tx_desc_avail;
> -     u_int16_t               num_tx_desc;
> +     volatile u_int32_t      num_tx_desc_avail;
> +     u_int32_t               num_tx_desc;
>       u_int32_t               txd_cmd;
>       struct em_buffer        *tx_buffer_area;
>       bus_dma_tag_t           txtag;          /* dma tag for tx */
> 

Reply via email to