Module Name: src Committed By: msaitoh Date: Tue May 8 09:45:54 UTC 2018
Modified Files: src/sys/dev/pci/ixgbe: ix_txrx.c ixgbe.c ixgbe.h ixv.c Log Message: - Fix broken watchdog timer. This change detects TX device timeout correctly. NOTE: It's supporsed not to be called {ixgbe,ixv}_rearm_queues() in the timer. Those are not required if any chip have no bug. In reality, ixgbe_rearm_queues() is required on 82599 and newer chip AND other than queue 0 to prevent device timeout. When it occured, packet was sent but the descriptor's DD bit wasn't set even though IXGBE_TXD_CMD_EOP and IXGBE_TXD_CMD_RS were set. After forcing interrupt by writing EICS register in ixgbe_rearm_queues(), DD is set. Why? Is this an undocumented errata? It might be possible not call rearm_queues on 82598 or queue 0, we call in any cases in case the problem occurs. On ixv(4), I have not seen this problem yet (though I tested only on X550_X(Xeon D 12xx)'s virtual function), but we do rearm in case TX device timeout happen. - ixv(4): Call callout_stop() earlier in ixv_stop() like ixgbe_stop(). - KNF. To generate a diff of this commit: cvs rdiff -u -r1.41 -r1.42 src/sys/dev/pci/ixgbe/ix_txrx.c cvs rdiff -u -r1.149 -r1.150 src/sys/dev/pci/ixgbe/ixgbe.c cvs rdiff -u -r1.46 -r1.47 src/sys/dev/pci/ixgbe/ixgbe.h cvs rdiff -u -r1.95 -r1.96 src/sys/dev/pci/ixgbe/ixv.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/dev/pci/ixgbe/ix_txrx.c diff -u src/sys/dev/pci/ixgbe/ix_txrx.c:1.41 src/sys/dev/pci/ixgbe/ix_txrx.c:1.42 --- src/sys/dev/pci/ixgbe/ix_txrx.c:1.41 Wed Apr 25 08:46:19 2018 +++ src/sys/dev/pci/ixgbe/ix_txrx.c Tue May 8 09:45:54 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: ix_txrx.c,v 1.41 2018/04/25 08:46:19 msaitoh Exp $ */ +/* $NetBSD: ix_txrx.c,v 1.42 2018/05/08 09:45:54 msaitoh Exp $ */ /****************************************************************************** @@ -130,9 +130,10 @@ static void ixgbe_setup_hw_rsc(struct rx int ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr) { - int rc; struct mbuf *m_head; struct adapter *adapter = txr->adapter; + int enqueued = 0; + int rc; IXGBE_TX_LOCK_ASSERT(txr); @@ -158,6 +159,7 @@ ixgbe_legacy_start_locked(struct ifnet * if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) { break; } + enqueued++; IFQ_DEQUEUE(&ifp->if_snd, m_head); if (rc != 0) { m_freem(m_head); @@ -167,6 +169,10 @@ ixgbe_legacy_start_locked(struct ifnet * /* Send a copy of the frame to the BPF listener */ bpf_mtap(ifp, m_head); } + if (enqueued) { + txr->lastsent = time_uptime; + txr->sending = true; + } return IXGBE_SUCCESS; } /* ixgbe_legacy_start_locked */ @@ -313,6 +319,11 @@ ixgbe_mq_start_locked(struct ifnet *ifp, break; } + if (enqueued) { + txr->lastsent = time_uptime; + txr->sending = true; + } + if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter)) ixgbe_txeof(txr); @@ -537,10 +548,6 @@ retry: if (m_head->m_flags & M_MCAST) ifp->if_omcasts++; - /* Mark queue as having work */ - if (txr->busy == 0) - txr->busy = 1; - return (0); } /* ixgbe_xmit */ @@ -666,6 +673,7 @@ ixgbe_setup_transmit_ring(struct tx_ring /* Free any existing tx buffers. */ txbuf = txr->tx_buffers; for (int i = 0; i < txr->num_desc; i++, txbuf++) { + txr->sending = false; if (txbuf->m_head != NULL) { bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map, 0, txbuf->m_head->m_pkthdr.len, @@ -1126,7 +1134,7 @@ ixgbe_txeof(struct tx_ring *txr) #endif /* DEV_NETMAP */ if (txr->tx_avail == txr->num_desc) { - txr->busy = 0; + txr->sending = false; return false; } @@ -1208,26 +1216,6 @@ ixgbe_txeof(struct tx_ring *txr) work += txr->num_desc; txr->next_to_clean = work; - /* - * Queue Hang detection, we know there's - * work outstanding or the first return - * would have been taken, so increment busy - * if nothing managed to get cleaned, then - * in local_timer it will be checked and - * marked as HUNG if it exceeds a MAX attempt. - */ - if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG)) - ++txr->busy; - /* - * If anything gets cleaned we reset state to 1, - * note this will turn off HUNG if its set. - */ - if (processed) - txr->busy = 1; - - if (txr->tx_avail == txr->num_desc) - txr->busy = 0; - return ((limit > 0) ? false : true); } /* ixgbe_txeof */ Index: src/sys/dev/pci/ixgbe/ixgbe.c diff -u src/sys/dev/pci/ixgbe/ixgbe.c:1.149 src/sys/dev/pci/ixgbe/ixgbe.c:1.150 --- src/sys/dev/pci/ixgbe/ixgbe.c:1.149 Thu Apr 19 07:40:12 2018 +++ src/sys/dev/pci/ixgbe/ixgbe.c Tue May 8 09:45:54 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: ixgbe.c,v 1.149 2018/04/19 07:40:12 msaitoh Exp $ */ +/* $NetBSD: ixgbe.c,v 1.150 2018/05/08 09:45:54 msaitoh Exp $ */ /****************************************************************************** @@ -184,6 +184,8 @@ static void ixgbe_free_pciintr_resources static void ixgbe_free_pci_resources(struct adapter *); static void ixgbe_local_timer(void *); static void ixgbe_local_timer1(void *); +static void ixgbe_watchdog(struct ifnet *); +static bool ixgbe_watchdog_txq(struct ifnet *, struct tx_ring *, bool *); static int ixgbe_setup_interface(device_t, struct adapter *); static void ixgbe_config_gpie(struct adapter *); static void ixgbe_config_dmac(struct adapter *); @@ -4257,11 +4259,8 @@ static void ixgbe_local_timer1(void *arg) { struct adapter *adapter = arg; - device_t dev = adapter->dev; struct ix_queue *que = adapter->queues; - u64 queues = 0; u64 v0, v1, v2, v3, v4, v5, v6, v7; - int hung = 0; int i; KASSERT(mutex_owned(&adapter->core_mtx)); @@ -4298,63 +4297,93 @@ ixgbe_local_timer1(void *arg) adapter->enomem_tx_dma_setup.ev_count = v6; adapter->tso_err.ev_count = v7; - /* - * Check the TX queues status - * - mark hung queues so we don't schedule on them - * - watchdog only if all queues show hung - */ - que = adapter->queues; - for (i = 0; i < adapter->num_queues; i++, que++) { - /* Keep track of queues with work for soft irq */ - if (que->txr->busy) - queues |= ((u64)1 << que->me); - /* - * Each time txeof runs without cleaning, but there - * are uncleaned descriptors it increments busy. If - * we get to the MAX we declare it hung. - */ - if (que->busy == IXGBE_QUEUE_HUNG) { - ++hung; - /* Mark the queue as inactive */ - adapter->active_queues &= ~((u64)1 << que->me); - continue; - } else { - /* Check if we've come back from hung */ - if ((adapter->active_queues & ((u64)1 << que->me)) == 0) - adapter->active_queues |= ((u64)1 << que->me); - } - if (que->busy >= IXGBE_MAX_TX_BUSY) { - device_printf(dev, - "Warning queue %d appears to be hung!\n", i); - que->txr->busy = IXGBE_QUEUE_HUNG; - ++hung; - } + ixgbe_watchdog(adapter->ifp); + +out: + callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter); +} /* ixgbe_local_timer */ + +static void +ixgbe_watchdog(struct ifnet *ifp) +{ + struct adapter *adapter = ifp->if_softc; + struct ix_queue *que; + struct tx_ring *txr; + u64 queues = 0; + bool hung = false; + bool sending = false; + int i; + + txr = adapter->tx_rings; + for (i = 0; i < adapter->num_queues; i++, txr++) { + hung = ixgbe_watchdog_txq(ifp, txr, &sending); + if (hung) + break; + else if (sending) + queues |= ((u64)1 << txr->me); } - /* Only truely watchdog if all queues show hung */ - if (hung == adapter->num_queues) - goto watchdog; - else if (queues != 0) { /* Force an IRQ on queues with work */ + if (hung) { + ifp->if_flags &= ~IFF_RUNNING; + ifp->if_oerrors++; + adapter->watchdog_events.ev_count++; + ixgbe_init_locked(adapter); + } else if (queues != 0) { + /* + * Force an IRQ on queues with work. + * + * It's supporsed not to be called ixgbe_rearm_queues() if + * any chips have no bug. In reality, ixgbe_rearm_queues() is + * required on 82599 and newer chip AND other than queue 0 to + * prevent device timeout. When it occured, packet was sent but + * the descriptor's DD bot wasn't set even though + * IXGBE_TXD_CMD_EOP and IXGBE_TXD_CMD_RS were set. After + * forcing interrupt by writing EICS register in + * ixgbe_rearm_queues(), DD is set. Why? Is this an + * undocumented errata? It might be possible not call + * rearm_queues on 82598 or queue 0, we call in any cases in + * case the problem occurs. + */ que = adapter->queues; for (i = 0; i < adapter->num_queues; i++, que++) { + u64 index = queues & ((u64)1 << i); + mutex_enter(&que->dc_mtx); - if (que->disabled_count == 0) - ixgbe_rearm_queues(adapter, - queues & ((u64)1 << i)); + if ((index != 0) && (que->disabled_count == 0)) + ixgbe_rearm_queues(adapter, index); mutex_exit(&que->dc_mtx); } } +} -out: - callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter); - return; +static bool +ixgbe_watchdog_txq(struct ifnet *ifp, struct tx_ring *txr, bool *sending) +{ + struct adapter *adapter = ifp->if_softc; + device_t dev = adapter->dev; + bool hung = false; + bool more = false; -watchdog: - device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); - adapter->ifp->if_flags &= ~IFF_RUNNING; - adapter->watchdog_events.ev_count++; - ixgbe_init_locked(adapter); -} /* ixgbe_local_timer */ + IXGBE_TX_LOCK(txr); + *sending = txr->sending; + if (*sending && ((time_uptime - txr->lastsent) > IXGBE_TX_TIMEOUT)) { + /* + * Since we're using delayed interrupts, sweep up before we + * report an error. + */ + do { + more = ixgbe_txeof(txr); + } while (more); + hung = true; + device_printf(dev, + "Watchdog timeout (queue %d%s)-- resetting\n", txr->me, + (txr->tx_avail == txr->num_desc) + ? ", lost interrupt?" : ""); + } + IXGBE_TX_UNLOCK(txr); + + return hung; +} /************************************************************************ * ixgbe_sfp_probe @@ -4514,6 +4543,8 @@ ixgbe_stop(void *arg) struct ifnet *ifp; struct adapter *adapter = arg; struct ixgbe_hw *hw = &adapter->hw; + struct tx_ring *txr; + int i; ifp = adapter->ifp; @@ -4523,6 +4554,13 @@ ixgbe_stop(void *arg) ixgbe_disable_intr(adapter); callout_stop(&adapter->timer); + txr = adapter->tx_rings; + for (i = 0; i < adapter->num_queues; i++, txr++) { + IXGBE_TX_LOCK(txr); + txr->sending = false; + IXGBE_TX_UNLOCK(txr); + } + /* Let the stack know...*/ ifp->if_flags &= ~IFF_RUNNING; @@ -6116,8 +6154,8 @@ static int ixgbe_allocate_msix(struct adapter *adapter, const struct pci_attach_args *pa) { device_t dev = adapter->dev; - struct ix_queue *que = adapter->queues; - struct tx_ring *txr = adapter->tx_rings; + struct ix_queue *que = adapter->queues; + struct tx_ring *txr = adapter->tx_rings; pci_chipset_tag_t pc; char intrbuf[PCI_INTRSTR_LEN]; char intr_xname[32]; @@ -6457,7 +6495,7 @@ ixgbe_handle_link(void *context) /************************************************************************ * ixgbe_rearm_queues ************************************************************************/ -static void +static inline void ixgbe_rearm_queues(struct adapter *adapter, u64 queues) { u32 mask; Index: src/sys/dev/pci/ixgbe/ixgbe.h diff -u src/sys/dev/pci/ixgbe/ixgbe.h:1.46 src/sys/dev/pci/ixgbe/ixgbe.h:1.47 --- src/sys/dev/pci/ixgbe/ixgbe.h:1.46 Wed Apr 25 08:46:19 2018 +++ src/sys/dev/pci/ixgbe/ixgbe.h Tue May 8 09:45:54 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: ixgbe.h,v 1.46 2018/04/25 08:46:19 msaitoh Exp $ */ +/* $NetBSD: ixgbe.h,v 1.47 2018/05/08 09:45:54 msaitoh Exp $ */ /****************************************************************************** SPDX-License-Identifier: BSD-3-Clause @@ -154,7 +154,7 @@ * pass between any two TX clean operations, such only happening * when the TX hardware is functioning. */ -#define IXGBE_WATCHDOG (10 * hz) +#define IXGBE_TX_TIMEOUT 10 /* * This parameters control when the driver calls the routine to reclaim @@ -219,8 +219,6 @@ #define IXGBE_VFTA_SIZE 128 #define IXGBE_BR_SIZE 4096 #define IXGBE_QUEUE_MIN_FREE 32 -#define IXGBE_MAX_TX_BUSY 10 -#define IXGBE_QUEUE_HUNG 0x80000000 #define IXGBE_EITR_DEFAULT 128 @@ -323,7 +321,6 @@ struct ix_queue { u32 eitr_setting; u32 me; struct resource *res; - int busy; struct tx_ring *txr; struct rx_ring *rxr; struct work wq_cookie; @@ -353,7 +350,8 @@ struct tx_ring { kmutex_t tx_mtx; u32 me; u32 tail; - int busy; + bool sending; /* enable/disable watchdog */ + time_t lastsent; union ixgbe_adv_tx_desc *tx_base; struct ixgbe_tx_buf *tx_buffers; struct ixgbe_dma_alloc txdma; Index: src/sys/dev/pci/ixgbe/ixv.c diff -u src/sys/dev/pci/ixgbe/ixv.c:1.95 src/sys/dev/pci/ixgbe/ixv.c:1.96 --- src/sys/dev/pci/ixgbe/ixv.c:1.95 Thu Apr 19 07:40:12 2018 +++ src/sys/dev/pci/ixgbe/ixv.c Tue May 8 09:45:54 2018 @@ -1,4 +1,4 @@ -/*$NetBSD: ixv.c,v 1.95 2018/04/19 07:40:12 msaitoh Exp $*/ +/*$NetBSD: ixv.c,v 1.96 2018/05/08 09:45:54 msaitoh Exp $*/ /****************************************************************************** @@ -101,6 +101,8 @@ static int ixv_configure_interrupts static void ixv_free_pci_resources(struct adapter *); static void ixv_local_timer(void *); static void ixv_local_timer_locked(void *); +static void ixv_watchdog(struct ifnet *); +static bool ixv_watchdog_txq(struct ifnet *, struct tx_ring *, bool *); static int ixv_setup_interface(device_t, struct adapter *); static int ixv_negotiate_api(struct adapter *); @@ -1189,11 +1191,8 @@ static void ixv_local_timer_locked(void *arg) { struct adapter *adapter = arg; - device_t dev = adapter->dev; struct ix_queue *que = adapter->queues; - u64 queues = 0; u64 v0, v1, v2, v3, v4, v5, v6, v7; - int hung = 0; int i; KASSERT(mutex_owned(&adapter->core_mtx)); @@ -1227,57 +1226,85 @@ ixv_local_timer_locked(void *arg) adapter->enomem_tx_dma_setup.ev_count = v6; adapter->tso_err.ev_count = v7; - /* - * Check the TX queues status - * - mark hung queues so we don't schedule on them - * - watchdog only if all queues show hung - */ - que = adapter->queues; - for (i = 0; i < adapter->num_queues; i++, que++) { - /* Keep track of queues with work for soft irq */ - if (que->txr->busy) - queues |= ((u64)1 << que->me); + ixv_watchdog(adapter->ifp); + + callout_reset(&adapter->timer, hz, ixv_local_timer, adapter); + + return; +} /* ixv_local_timer */ + +static void +ixv_watchdog(struct ifnet *ifp) +{ + struct adapter *adapter = ifp->if_softc; + struct ix_queue *que; + struct tx_ring *txr; + u64 queues = 0; + bool hung = false; + bool sending = false; + int i; + + txr = adapter->tx_rings; + for (i = 0; i < adapter->num_queues; i++, txr++) { + hung = ixv_watchdog_txq(ifp, txr, &sending); + if (hung) + break; + else if (sending) + queues |= ((u64)1 << txr->me); + } + + if (hung) { + ifp->if_flags &= ~IFF_RUNNING; + ifp->if_oerrors++; + adapter->watchdog_events.ev_count++; + ixv_init_locked(adapter); + } else if (queues != 0) { /* - * Each time txeof runs without cleaning, but there - * are uncleaned descriptors it increments busy. If - * we get to the MAX we declare it hung. + * Force an IRQ on queues with work + * + * Calling ixv_rearm_queues() might not required. I've never + * seen any device timeout on ixv(4). */ - if (que->busy == IXGBE_QUEUE_HUNG) { - ++hung; - /* Mark the queue as inactive */ - adapter->active_queues &= ~((u64)1 << que->me); - continue; - } else { - /* Check if we've come back from hung */ - if ((adapter->active_queues & ((u64)1 << que->me)) == 0) - adapter->active_queues |= ((u64)1 << que->me); + que = adapter->queues; + for (i = 0; i < adapter->num_queues; i++, que++) { + u64 index = queues & ((u64)1 << i); + + mutex_enter(&que->dc_mtx); + if ((index != 0) && (que->disabled_count == 0)) + ixv_rearm_queues(adapter, index); + mutex_exit(&que->dc_mtx); } - if (que->busy >= IXGBE_MAX_TX_BUSY) { - device_printf(dev, - "Warning queue %d appears to be hung!\n", i); - que->txr->busy = IXGBE_QUEUE_HUNG; - ++hung; - } - } - - /* Only truly watchdog if all queues show hung */ - if (hung == adapter->num_queues) - goto watchdog; - else if (queues != 0) { /* Force an IRQ on queues with work */ - ixv_rearm_queues(adapter, queues); } +} - callout_reset(&adapter->timer, hz, ixv_local_timer, adapter); - - return; +static bool +ixv_watchdog_txq(struct ifnet *ifp, struct tx_ring *txr, bool *sending) +{ + struct adapter *adapter = ifp->if_softc; + device_t dev = adapter->dev; + bool hung = false; + bool more = false; -watchdog: + IXGBE_TX_LOCK(txr); + *sending = txr->sending; + if (*sending && ((time_uptime - txr->lastsent) > IXGBE_TX_TIMEOUT)) { + /* + * Since we're using delayed interrupts, sweep up before we + * report an error. + */ + do { + more = ixgbe_txeof(txr); + } while (more); + hung = true; + device_printf(dev, + "Watchdog timeout (queue %d%s)-- resetting\n", txr->me, + (txr->tx_avail == txr->num_desc) + ? ", lost interrupt?" : ""); + } + IXGBE_TX_UNLOCK(txr); - device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); - adapter->ifp->if_flags &= ~IFF_RUNNING; - adapter->watchdog_events.ev_count++; - ixv_init_locked(adapter); -} /* ixv_local_timer */ + return hung; +} /************************************************************************ * ixv_update_link_status - Update OS on link state @@ -1361,6 +1388,8 @@ ixv_stop(void *arg) struct ifnet *ifp; struct adapter *adapter = arg; struct ixgbe_hw *hw = &adapter->hw; + struct tx_ring *txr; + int i; ifp = adapter->ifp; @@ -1368,6 +1397,14 @@ ixv_stop(void *arg) INIT_DEBUGOUT("ixv_stop: begin\n"); ixv_disable_intr(adapter); + callout_stop(&adapter->timer); + + txr = adapter->tx_rings; + for (i = 0; i < adapter->num_queues; i++, txr++) { + IXGBE_TX_LOCK(txr); + txr->sending = false; + IXGBE_TX_UNLOCK(txr); + } /* Tell the stack that the interface is no longer active */ ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE); @@ -1375,7 +1412,6 @@ ixv_stop(void *arg) hw->mac.ops.reset_hw(hw); adapter->hw.adapter_stopped = FALSE; hw->mac.ops.stop_adapter(hw); - callout_stop(&adapter->timer); /* reprogram the RAR[0] in case user changed it. */ hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV); @@ -2877,7 +2913,7 @@ ixv_allocate_msix(struct adapter *adapte { device_t dev = adapter->dev; struct ix_queue *que = adapter->queues; - struct tx_ring *txr = adapter->tx_rings; + struct tx_ring *txr = adapter->tx_rings; int error, msix_ctrl, rid, vector = 0; pci_chipset_tag_t pc; pcitag_t tag;