Re: [PATCH net v4 4/4] tg3: Fix tx_pending checks for tg3_tso_bug
On 2014/08/28 20:24, Prashant Sreedharan wrote: > > > > - for (i = 0; i < tp->irq_max; i++) > > - tp->napi[i].tx_pending = ering->tx_pending; > > + dev->gso_max_segs = TG3_TX_SEG_PER_DESC(ering->tx_pending - 1); > > + for (i = 0; i < tp->irq_max; i++) { > > + struct tg3_napi *tnapi = >napi[i]; > > + > > + tnapi->tx_pending = ering->tx_pending; > > + if (netif_tx_queue_stopped(netdev_get_tx_queue(dev, i)) && > > Need to limit the number of TX queues to tp->txq_cnt instead of > tp->irq_max as txq_cnt can be less than irq_max. > > netif_set_real_num_tx_queues(tp->dev, tp->txq_cnt); Thanks for your careful review, I'll resubmit shortly. > > > + tnapi->wakeup_thresh >= ering->tx_pending) > > + tnapi->wakeup_thresh = MAX_SKB_FRAGS + 1; > > + } > > > > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH net v4 4/4] tg3: Fix tx_pending checks for tg3_tso_bug
On 2014/08/28 20:24, Prashant Sreedharan wrote: - for (i = 0; i tp-irq_max; i++) - tp-napi[i].tx_pending = ering-tx_pending; + dev-gso_max_segs = TG3_TX_SEG_PER_DESC(ering-tx_pending - 1); + for (i = 0; i tp-irq_max; i++) { + struct tg3_napi *tnapi = tp-napi[i]; + + tnapi-tx_pending = ering-tx_pending; + if (netif_tx_queue_stopped(netdev_get_tx_queue(dev, i)) Need to limit the number of TX queues to tp-txq_cnt instead of tp-irq_max as txq_cnt can be less than irq_max. netif_set_real_num_tx_queues(tp-dev, tp-txq_cnt); Thanks for your careful review, I'll resubmit shortly. + tnapi-wakeup_thresh = ering-tx_pending) + tnapi-wakeup_thresh = MAX_SKB_FRAGS + 1; + } -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH net v4 4/4] tg3: Fix tx_pending checks for tg3_tso_bug
> > - for (i = 0; i < tp->irq_max; i++) > - tp->napi[i].tx_pending = ering->tx_pending; > + dev->gso_max_segs = TG3_TX_SEG_PER_DESC(ering->tx_pending - 1); > + for (i = 0; i < tp->irq_max; i++) { > + struct tg3_napi *tnapi = >napi[i]; > + > + tnapi->tx_pending = ering->tx_pending; > + if (netif_tx_queue_stopped(netdev_get_tx_queue(dev, i)) && Need to limit the number of TX queues to tp->txq_cnt instead of tp->irq_max as txq_cnt can be less than irq_max. netif_set_real_num_tx_queues(tp->dev, tp->txq_cnt); > + tnapi->wakeup_thresh >= ering->tx_pending) > + tnapi->wakeup_thresh = MAX_SKB_FRAGS + 1; > + } > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH net v4 4/4] tg3: Fix tx_pending checks for tg3_tso_bug
> if (netif_running(dev)) { > @@ -12346,8 +12380,15 @@ static int tg3_set_ringparam(struct net_device *dev, > struct ethtool_ringparam *e > if (tg3_flag(tp, JUMBO_RING_ENABLE)) > tp->rx_jumbo_pending = ering->rx_jumbo_pending; > > - for (i = 0; i < tp->irq_max; i++) > - tp->napi[i].tx_pending = ering->tx_pending; > + dev->gso_max_segs = TG3_TX_SEG_PER_DESC(ering->tx_pending - 1); > + for (i = 0; i < tp->irq_max; i++) { > + struct tg3_napi *tnapi = >napi[i]; > + > + tnapi->tx_pending = ering->tx_pending; > + if (netif_tx_queue_stopped(netdev_get_tx_queue(dev, i)) && > + tnapi->wakeup_thresh >= ering->tx_pending) > + tnapi->wakeup_thresh = MAX_SKB_FRAGS + 1; To maintain consistency wakeup_thresh can be set to TG3_TX_WAKEUP_THRESH similar to other parts of the code, except the special handling of tg3_tso_bug() estimate. > + } > > if (netif_running(dev)) { > tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); > @@ -17822,6 +17863,7 @@ static int tg3_init_one(struct pci_dev *pdev, > else > sndmbx += 0xc; > } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH net v4 4/4] tg3: Fix tx_pending checks for tg3_tso_bug
if (netif_running(dev)) { @@ -12346,8 +12380,15 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e if (tg3_flag(tp, JUMBO_RING_ENABLE)) tp-rx_jumbo_pending = ering-rx_jumbo_pending; - for (i = 0; i tp-irq_max; i++) - tp-napi[i].tx_pending = ering-tx_pending; + dev-gso_max_segs = TG3_TX_SEG_PER_DESC(ering-tx_pending - 1); + for (i = 0; i tp-irq_max; i++) { + struct tg3_napi *tnapi = tp-napi[i]; + + tnapi-tx_pending = ering-tx_pending; + if (netif_tx_queue_stopped(netdev_get_tx_queue(dev, i)) + tnapi-wakeup_thresh = ering-tx_pending) + tnapi-wakeup_thresh = MAX_SKB_FRAGS + 1; To maintain consistency wakeup_thresh can be set to TG3_TX_WAKEUP_THRESH similar to other parts of the code, except the special handling of tg3_tso_bug() estimate. + } if (netif_running(dev)) { tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); @@ -17822,6 +17863,7 @@ static int tg3_init_one(struct pci_dev *pdev, else sndmbx += 0xc; } -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH net v4 4/4] tg3: Fix tx_pending checks for tg3_tso_bug
- for (i = 0; i tp-irq_max; i++) - tp-napi[i].tx_pending = ering-tx_pending; + dev-gso_max_segs = TG3_TX_SEG_PER_DESC(ering-tx_pending - 1); + for (i = 0; i tp-irq_max; i++) { + struct tg3_napi *tnapi = tp-napi[i]; + + tnapi-tx_pending = ering-tx_pending; + if (netif_tx_queue_stopped(netdev_get_tx_queue(dev, i)) Need to limit the number of TX queues to tp-txq_cnt instead of tp-irq_max as txq_cnt can be less than irq_max. netif_set_real_num_tx_queues(tp-dev, tp-txq_cnt); + tnapi-wakeup_thresh = ering-tx_pending) + tnapi-wakeup_thresh = MAX_SKB_FRAGS + 1; + } -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH net v4 4/4] tg3: Fix tx_pending checks for tg3_tso_bug
In tg3_set_ringparam(), the tx_pending test to cover the cases where tg3_tso_bug() is entered has two problems 1) the check is only done for certain hardware whereas the workaround is now used more broadly. IOW, the check may not be performed when it is needed. 2) the check is too optimistic. For example, with a 5761 (SHORT_DMA_BUG), tg3_set_ringparam() skips over the "tx_pending <= (MAX_SKB_FRAGS * 3)" check because TSO_BUG is false. Even if it did do the check, with a full sized skb, frag_cnt_est = 135 but the check is for <= MAX_SKB_FRAGS * 3 (= 17 * 3 = 51). So the check is insufficient. This leads to the following situation: by setting, ex. tx_pending = 100, there can be an skb that triggers tg3_tso_bug() and that is large enough to cause tg3_tso_bug() to stop the queue even when it is empty. We then end up with a netdev watchdog transmit timeout. Given that 1) some of the conditions tested for in tg3_tx_frag_set() apply regardless of the chipset flags and that 2) it is difficult to estimate ahead of time the max possible number of frames that a large skb may be split into by gso, we instead take the approach of adjusting dev->gso_max_segs according to the requested tx_pending size. This puts us in the exceptional situation that a single skb that triggers tg3_tso_bug() may require the entire tx ring. Usually the tx queue is woken up when at least a quarter of it is available (TG3_TX_WAKEUP_THRESH) but that would be insufficient now. To avoid useless wakeups, the tx queue wake up threshold is made dynamic. Likewise, usually the tx queue is stopped as soon as an skb with max frags may overrun it. Since the skbs submitted from tg3_tso_bug() use a controlled number of descriptors, the tx queue stop threshold may be lowered. Signed-off-by: Benjamin Poirier --- Changes v1->v2 * in tg3_set_ringparam(), reduce gso_max_segs further to budget 3 descriptors per gso seg instead of only 1 as in v1 * in tg3_tso_bug(), check that this estimation (3 desc/seg) holds, otherwise linearize some skbs as needed * in tg3_start_xmit(), make the queue stop threshold a parameter, for the reason explained in the commit description Changes v2->v3 * use tg3_maybe_stop_txq() instead of repeatedly open coding it * add the requested tp->tx_dropped++ stat increase in tg3_tso_bug() if skb_linearize() fails and we must abort * in the same code block, add an additional check to stop the queue with the default threshold. Otherwise, the netdev_err message at the start of __tg3_start_xmit() could be triggered when the next frame is transmitted. That is because the previous calls to __tg3_start_xmit() in tg3_tso_bug() may have been using a stop_thresh=segs_remaining that is < MAX_SKB_FRAGS + 1. Changes v3->v4 * in tg3_set_ringparam(), make sure that wakeup_thresh does not end up being >= tx_pending. Identified by Prashant. I reproduced this bug using the same approach explained in patch 1. The bug reproduces with tx_pending <= 135 --- drivers/net/ethernet/broadcom/tg3.c | 70 + drivers/net/ethernet/broadcom/tg3.h | 1 + 2 files changed, 57 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index f706a1e..05cb940 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -204,6 +204,10 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits) /* minimum number of free TX descriptors required to wake up TX process */ #define TG3_TX_WAKEUP_THRESH(tnapi)max_t(u32, (tnapi)->tx_pending / 4, \ MAX_SKB_FRAGS + 1) +/* estimate a certain number of descriptors per gso segment */ +#define TG3_TX_DESC_PER_SEG(seg_nb)((seg_nb) * 3) +#define TG3_TX_SEG_PER_DESC(desc_nb) ((desc_nb) / 3) + #define TG3_TX_BD_DMA_MAX_2K 2048 #define TG3_TX_BD_DMA_MAX_4K 4096 @@ -6609,10 +6613,10 @@ static void tg3_tx(struct tg3_napi *tnapi) smp_mb(); if (unlikely(netif_tx_queue_stopped(txq) && -(tg3_tx_avail(tnapi) > TG3_TX_WAKEUP_THRESH(tnapi { +(tg3_tx_avail(tnapi) > tnapi->wakeup_thresh))) { __netif_tx_lock(txq, smp_processor_id()); if (netif_tx_queue_stopped(txq) && - (tg3_tx_avail(tnapi) > TG3_TX_WAKEUP_THRESH(tnapi))) + (tg3_tx_avail(tnapi) > tnapi->wakeup_thresh)) netif_tx_wake_queue(txq); __netif_tx_unlock(txq); } @@ -7830,6 +7834,8 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi, } static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *); +static netdev_tx_t __tg3_start_xmit(struct sk_buff *, struct net_device *, + u32); /* Returns true if the queue has been stopped. Note that it may have been * restarted since. @@ -7844,6 +7850,7 @@ static
[PATCH net v4 4/4] tg3: Fix tx_pending checks for tg3_tso_bug
In tg3_set_ringparam(), the tx_pending test to cover the cases where tg3_tso_bug() is entered has two problems 1) the check is only done for certain hardware whereas the workaround is now used more broadly. IOW, the check may not be performed when it is needed. 2) the check is too optimistic. For example, with a 5761 (SHORT_DMA_BUG), tg3_set_ringparam() skips over the tx_pending = (MAX_SKB_FRAGS * 3) check because TSO_BUG is false. Even if it did do the check, with a full sized skb, frag_cnt_est = 135 but the check is for = MAX_SKB_FRAGS * 3 (= 17 * 3 = 51). So the check is insufficient. This leads to the following situation: by setting, ex. tx_pending = 100, there can be an skb that triggers tg3_tso_bug() and that is large enough to cause tg3_tso_bug() to stop the queue even when it is empty. We then end up with a netdev watchdog transmit timeout. Given that 1) some of the conditions tested for in tg3_tx_frag_set() apply regardless of the chipset flags and that 2) it is difficult to estimate ahead of time the max possible number of frames that a large skb may be split into by gso, we instead take the approach of adjusting dev-gso_max_segs according to the requested tx_pending size. This puts us in the exceptional situation that a single skb that triggers tg3_tso_bug() may require the entire tx ring. Usually the tx queue is woken up when at least a quarter of it is available (TG3_TX_WAKEUP_THRESH) but that would be insufficient now. To avoid useless wakeups, the tx queue wake up threshold is made dynamic. Likewise, usually the tx queue is stopped as soon as an skb with max frags may overrun it. Since the skbs submitted from tg3_tso_bug() use a controlled number of descriptors, the tx queue stop threshold may be lowered. Signed-off-by: Benjamin Poirier bpoir...@suse.de --- Changes v1-v2 * in tg3_set_ringparam(), reduce gso_max_segs further to budget 3 descriptors per gso seg instead of only 1 as in v1 * in tg3_tso_bug(), check that this estimation (3 desc/seg) holds, otherwise linearize some skbs as needed * in tg3_start_xmit(), make the queue stop threshold a parameter, for the reason explained in the commit description Changes v2-v3 * use tg3_maybe_stop_txq() instead of repeatedly open coding it * add the requested tp-tx_dropped++ stat increase in tg3_tso_bug() if skb_linearize() fails and we must abort * in the same code block, add an additional check to stop the queue with the default threshold. Otherwise, the netdev_err message at the start of __tg3_start_xmit() could be triggered when the next frame is transmitted. That is because the previous calls to __tg3_start_xmit() in tg3_tso_bug() may have been using a stop_thresh=segs_remaining that is MAX_SKB_FRAGS + 1. Changes v3-v4 * in tg3_set_ringparam(), make sure that wakeup_thresh does not end up being = tx_pending. Identified by Prashant. I reproduced this bug using the same approach explained in patch 1. The bug reproduces with tx_pending = 135 --- drivers/net/ethernet/broadcom/tg3.c | 70 + drivers/net/ethernet/broadcom/tg3.h | 1 + 2 files changed, 57 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index f706a1e..05cb940 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -204,6 +204,10 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits) /* minimum number of free TX descriptors required to wake up TX process */ #define TG3_TX_WAKEUP_THRESH(tnapi)max_t(u32, (tnapi)-tx_pending / 4, \ MAX_SKB_FRAGS + 1) +/* estimate a certain number of descriptors per gso segment */ +#define TG3_TX_DESC_PER_SEG(seg_nb)((seg_nb) * 3) +#define TG3_TX_SEG_PER_DESC(desc_nb) ((desc_nb) / 3) + #define TG3_TX_BD_DMA_MAX_2K 2048 #define TG3_TX_BD_DMA_MAX_4K 4096 @@ -6609,10 +6613,10 @@ static void tg3_tx(struct tg3_napi *tnapi) smp_mb(); if (unlikely(netif_tx_queue_stopped(txq) -(tg3_tx_avail(tnapi) TG3_TX_WAKEUP_THRESH(tnapi { +(tg3_tx_avail(tnapi) tnapi-wakeup_thresh))) { __netif_tx_lock(txq, smp_processor_id()); if (netif_tx_queue_stopped(txq) - (tg3_tx_avail(tnapi) TG3_TX_WAKEUP_THRESH(tnapi))) + (tg3_tx_avail(tnapi) tnapi-wakeup_thresh)) netif_tx_wake_queue(txq); __netif_tx_unlock(txq); } @@ -7830,6 +7834,8 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi, } static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *); +static netdev_tx_t __tg3_start_xmit(struct sk_buff *, struct net_device *, + u32); /* Returns true if the queue has been stopped. Note that it may have been * restarted since. @@ -7844,6 +7850,7 @@ static inline bool