So Mark and I spent some time to figure out what the issue was with ix(4)
based on that info I resurected the em(4) mpsafe diff that got backed out
and I applied the same fix. It is somewhat unclear if this fixes the
watchdog timeouts since in theory the wdog timer should be stopped when
hitting the race condition we hit in ix(4).

I'm currently hammering my test system with this and until now I have not
seen a watchdog fired.
-- 
:wq Claudio

Index: pci/if_em.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em.c,v
retrieving revision 1.313
diff -u -p -r1.313 if_em.c
--- pci/if_em.c 25 Nov 2015 03:09:59 -0000      1.313
+++ pci/if_em.c 3 Dec 2015 22:08:54 -0000
@@ -612,6 +612,16 @@ em_start(struct ifnet *ifp)
                if (em_encap(sc, m_head)) {
                        ifq_deq_rollback(&ifp->if_snd, m_head);
                        ifq_set_oactive(&ifp->if_snd);
+                       /*
+                        *  Make sure there are still packets on the
+                        *  ring.  The interrupt handler may have
+                        *  cleaned up the ring before we were able to
+                        *  set the IF_OACTIVE flag.
+                        */
+                       if (sc->num_tx_desc_avail == sc->num_tx_desc) {
+                               ifq_clr_oactive(&ifp->if_snd);
+                               continue;
+                       }
                        break;
                }
 
@@ -918,20 +928,17 @@ em_intr(void *arg)
        if (reg_icr & E1000_ICR_RXO)
                sc->rx_overruns++;
 
-       KERNEL_LOCK();
-
        /* Link status change */
        if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
+               KERNEL_LOCK();
                sc->hw.get_link_status = 1;
                em_check_for_link(&sc->hw);
                em_update_link_status(sc);
+               if (!IFQ_IS_EMPTY(&ifp->if_snd))
+                       em_start(ifp);
+               KERNEL_UNLOCK();
        }
 
-       if (ifp->if_flags & IFF_RUNNING && !IFQ_IS_EMPTY(&ifp->if_snd))
-               em_start(ifp);
-
-       KERNEL_UNLOCK();
-
        if (refill && em_rxfill(sc)) {
                /* Advance the Rx Queue #0 "Tail Pointer". */
                E1000_WRITE_REG(&sc->hw, RDT, sc->last_rx_desc_filled);
@@ -1108,17 +1115,10 @@ em_encap(struct em_softc *sc, struct mbu
        struct em_buffer   *tx_buffer, *tx_buffer_mapped;
        struct em_tx_desc *current_tx_desc = NULL;
 
-       /*
-        * Force a cleanup if number of TX descriptors
-        * available hits the threshold
-        */
-       if (sc->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
-               em_txeof(sc);
-               /* Now do we at least have a minimal? */
-               if (sc->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
-                       sc->no_tx_desc_avail1++;
-                       return (ENOBUFS);
-               }
+       /* Check that we have least the minimal number of TX descriptors. */
+       if (sc->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
+               sc->no_tx_desc_avail1++;
+               return (ENOBUFS);
        }
 
        if (sc->hw.mac_type == em_82547) {
@@ -1220,12 +1220,6 @@ em_encap(struct em_softc *sc, struct mbu
                }
        }
 
-       sc->next_avail_tx_desc = i;
-       if (sc->pcix_82544)
-               sc->num_tx_desc_avail -= txd_used;
-       else
-               sc->num_tx_desc_avail -= map->dm_nsegs;
-
 #if NVLAN > 0
        /* Find out if we are in VLAN mode */
        if (m_head->m_flags & M_VLANTAG) {
@@ -1259,6 +1253,14 @@ em_encap(struct em_softc *sc, struct mbu
        tx_buffer = &sc->tx_buffer_area[first];
        tx_buffer->next_eop = last;
 
+       membar_producer();
+
+       sc->next_avail_tx_desc = i;
+       if (sc->pcix_82544)
+               atomic_sub_int(&sc->num_tx_desc_avail, txd_used);
+       else
+               atomic_sub_int(&sc->num_tx_desc_avail, map->dm_nsegs);
+
        /* 
         * Advance the Transmit Descriptor Tail (Tdt),
         * this tells the E1000 that this frame is
@@ -2389,10 +2391,12 @@ em_transmit_checksum_setup(struct em_sof
        tx_buffer->m_head = NULL;
        tx_buffer->next_eop = -1;
 
+       membar_producer();
+
        if (++curr_txd == sc->num_tx_desc)
                curr_txd = 0;
 
-       sc->num_tx_desc_avail--;
+       atomic_dec_int(&sc->num_tx_desc_avail);
        sc->next_avail_tx_desc = curr_txd;
 }
 
@@ -2406,7 +2410,7 @@ em_transmit_checksum_setup(struct em_sof
 void
 em_txeof(struct em_softc *sc)
 {
-       int first, last, done, num_avail;
+       int first, last, done, num_avail, free = 0;
        struct em_buffer *tx_buffer;
        struct em_tx_desc   *tx_desc, *eop_desc;
        struct ifnet   *ifp = &sc->interface_data.ac_if;
@@ -2414,9 +2418,8 @@ em_txeof(struct em_softc *sc)
        if (sc->num_tx_desc_avail == sc->num_tx_desc)
                return;
 
-       KERNEL_LOCK();
+       membar_consumer();
 
-       num_avail = sc->num_tx_desc_avail;
        first = sc->next_tx_to_clean;
        tx_desc = &sc->tx_desc_base[first];
        tx_buffer = &sc->tx_buffer_area[first];
@@ -2440,7 +2443,7 @@ em_txeof(struct em_softc *sc)
                while (first != done) {
                        tx_desc->upper.data = 0;
                        tx_desc->lower.data = 0;
-                       num_avail++;
+                       free++;
 
                        if (tx_buffer->m_head != NULL) {
                                ifp->if_opackets++;
@@ -2480,25 +2483,22 @@ em_txeof(struct em_softc *sc)
 
        sc->next_tx_to_clean = first;
 
-       /*
-        * If we have enough room, clear IFF_OACTIVE to tell the stack
-        * that it is OK to send packets.
-        * If there are no pending descriptors, clear the timeout. Otherwise,
-        * if some descriptors have been freed, restart the timeout.
-        */
-       if (num_avail > EM_TX_CLEANUP_THRESHOLD)
-               ifq_clr_oactive(&ifp->if_snd);
+       num_avail = atomic_add_int_nv(&sc->num_tx_desc_avail, free);
 
        /* All clean, turn off the timer */
        if (num_avail == sc->num_tx_desc)
                ifp->if_timer = 0;
-       /* Some cleaned, reset the timer */
-       else if (num_avail != sc->num_tx_desc_avail)
-               ifp->if_timer = EM_TX_TIMEOUT;
 
-       sc->num_tx_desc_avail = num_avail;
-
-       KERNEL_UNLOCK();
+       /*
+        * If we have enough room, clear IFF_OACTIVE to tell the stack
+        * that it is OK to send packets.
+        */
+       if (ifq_is_oactive(&ifp->if_snd) && num_avail > EM_TX_OP_THRESHOLD) {
+               ifq_clr_oactive(&ifp->if_snd);
+               KERNEL_LOCK();
+               em_start(ifp);
+               KERNEL_UNLOCK();
+       }
 }
 
 /*********************************************************************
Index: pci/if_em.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em.h,v
retrieving revision 1.61
diff -u -p -r1.61 if_em.h
--- pci/if_em.h 24 Nov 2015 17:11:39 -0000      1.61
+++ pci/if_em.h 3 Dec 2015 22:14:43 -0000
@@ -49,6 +49,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include <sys/device.h>
 #include <sys/socket.h>
 #include <sys/timeout.h>
+#include <sys/atomic.h>
 
 #include <net/if.h>
 #include <net/if_media.h>
@@ -181,10 +182,9 @@ typedef int        boolean_t;
 #define EM_TX_TIMEOUT                  5       /* set to 5 seconds */
 
 /*
- * These parameters control when the driver calls the routine to reclaim
- * transmit descriptors.
+ * Thise parameter controls the minimum number of available transmit
+ * descriptors needed before we attempt transmission of a packet.
  */
-#define EM_TX_CLEANUP_THRESHOLD                (sc->num_tx_desc / 8)
 #define EM_TX_OP_THRESHOLD             (sc->num_tx_desc / 32)
 
 /*
@@ -350,8 +350,8 @@ struct em_softc {
        struct em_tx_desc       *tx_desc_base;
        u_int32_t               next_avail_tx_desc;
        u_int32_t               next_tx_to_clean;
-       volatile u_int16_t      num_tx_desc_avail;
-       u_int16_t               num_tx_desc;
+       volatile u_int32_t      num_tx_desc_avail;
+       u_int32_t               num_tx_desc;
        u_int32_t               txd_cmd;
        struct em_buffer        *tx_buffer_area;
        bus_dma_tag_t           txtag;          /* dma tag for tx */

Reply via email to