On 2018/01/26 13:01, Alexander Duyck wrote:
> On Fri, Jan 26, 2018 at 1:12 AM, Benjamin Poirier <bpoir...@suse.com> wrote:
> > This reverts commit 16ecba59bc333d6282ee057fb02339f77a880beb.
> >
> > It was reported that emulated e1000e devices in vmware esxi 6.5 Build
> > 7526125 do not link up after commit 4aea7a5c5e94 ("e1000e: Avoid receiver
> > overrun interrupt bursts"). Some tracing shows that after
> > e1000e_trigger_lsc() is called, ICR reads out as 0x0 in e1000_msix_other()
> > on emulated e1000e devices. In comparison, on real e1000e 82574 hardware,
> > icr=0x80000004 (_INT_ASSERTED | _LSC) in the same situation.
> >
> > Some experimentation showed that this flaw in vmware e1000e emulation can
> > be worked around by not setting Other in EIAC. This is how it was before
> > commit 16ecba59bc33 ("e1000e: Do not read ICR in Other interrupt").
> >
> > Since the ICR read in the Other interrupt handler has already been
> > restored, this patch effectively reverts the remainder of commit
> > 16ecba59bc33 ("e1000e: Do not read ICR in Other interrupt").
> >
> > Fixes: 4aea7a5c5e94 ("e1000e: Avoid receiver overrun interrupt bursts")
> > Signed-off-by: Benjamin Poirier <bpoir...@suse.com>
> > ---
> >  drivers/net/ethernet/intel/e1000e/netdev.c | 10 ++++++++--
> >  1 file changed, 8 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
> > b/drivers/net/ethernet/intel/e1000e/netdev.c
> > index ed103b9a8d3a..fffc1f0e3895 100644
> > --- a/drivers/net/ethernet/intel/e1000e/netdev.c
> > +++ b/drivers/net/ethernet/intel/e1000e/netdev.c
> > @@ -1916,6 +1916,13 @@ static irqreturn_t e1000_msix_other(int 
> > __always_unused irq, void *data)
> >         struct e1000_hw *hw = &adapter->hw;
> >         u32 icr = er32(ICR);
> >
> > +       /* Certain events (such as RXO) which trigger Other do not set
> > +        * INT_ASSERTED. In that case, read to clear of icr does not take
> > +        * place.
> > +        */
> > +       if (!(icr & E1000_ICR_INT_ASSERTED))
> > +               ew32(ICR, E1000_ICR_OTHER);
> > +
> 
> This piece doesn't make sense to me. Why are we clearing OTHER if
> ICR_INT_ASSERTED is not set?

Datasheet ยง10.2.4.1 ("Interrupt Cause Read Register") says that ICR read
to clear only occurs if INT_ASSERTED is set. This corresponds to what I
observed.

However, while working on these issues, I noticed that when there is an rxo
event, INT_ASSERTED is not always set even though the interrupt is raised. I
think this is a hardware flaw.

For example, if doing
ew32(ICS, E1000_ICS_LSC | E1000_ICS_OTHER);
we enter e1000_msix_other() and two consecutive reads of ICR result in
0x81000004
0x00000000

If doing
ew32(ICS, E1000_ICS_RXO | E1000_ICS_OTHER);
we enter e1000_msix_other() and two consecutive reads of ICR result in
0x01000041
0x01000041

Consequently, we must clear OTHER manually from ICR, otherwise the
interrupt is immediately re-raised after exiting the handler.

These observations are the same whether the interrupt is triggered via a
write to ICS or in hardware.

Furthermore, I tested that this behavior is the same for other Other
events (MDAC, SRPD, ACK, MNG). Those were tested via a write to ICS
only, not in hardware.

This is a version of the test patch that I used to trigger lsc and rxo in
software and hardware. It applies over this patch series.

diff --git a/drivers/net/ethernet/intel/e1000e/defines.h 
b/drivers/net/ethernet/intel/e1000e/defines.h
index 0641c0098738..f54e7ac9c934 100644
--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -398,6 +398,7 @@
 #define E1000_ICR_LSC           0x00000004 /* Link Status Change */
 #define E1000_ICR_RXSEQ         0x00000008 /* Rx sequence error */
 #define E1000_ICR_RXDMT0        0x00000010 /* Rx desc min. threshold (0) */
+#define E1000_ICR_RXO           0x00000040 /* rx overrun */
 #define E1000_ICR_RXT0          0x00000080 /* Rx timer intr (ring 0) */
 #define E1000_ICR_ECCER         0x00400000 /* Uncorrectable ECC Error */
 /* If this bit asserted, the driver should claim the interrupt */
diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c 
b/drivers/net/ethernet/intel/e1000e/ethtool.c
index 003cbd605799..4933c1beac74 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -1802,98 +1802,20 @@ static void e1000_diag_test(struct net_device *netdev,
                            struct ethtool_test *eth_test, u64 *data)
 {
        struct e1000_adapter *adapter = netdev_priv(netdev);
-       u16 autoneg_advertised;
-       u8 forced_speed_duplex;
-       u8 autoneg;
-       bool if_running = netif_running(netdev);
+       struct e1000_hw *hw = &adapter->hw;
 
        pm_runtime_get_sync(netdev->dev.parent);
 
        set_bit(__E1000_TESTING, &adapter->state);
 
-       if (!if_running) {
-               /* Get control of and reset hardware */
-               if (adapter->flags & FLAG_HAS_AMT)
-                       e1000e_get_hw_control(adapter);
-
-               e1000e_power_up_phy(adapter);
-
-               adapter->hw.phy.autoneg_wait_to_complete = 1;
-               e1000e_reset(adapter);
-               adapter->hw.phy.autoneg_wait_to_complete = 0;
-       }
-
        if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
-               /* Offline tests */
-
-               /* save speed, duplex, autoneg settings */
-               autoneg_advertised = adapter->hw.phy.autoneg_advertised;
-               forced_speed_duplex = adapter->hw.mac.forced_speed_duplex;
-               autoneg = adapter->hw.mac.autoneg;
-
-               e_info("offline testing starting\n");
-
-               if (if_running)
-                       /* indicate we're in test mode */
-                       e1000e_close(netdev);
-
-               if (e1000_reg_test(adapter, &data[0]))
-                       eth_test->flags |= ETH_TEST_FL_FAILED;
-
-               e1000e_reset(adapter);
-               if (e1000_eeprom_test(adapter, &data[1]))
-                       eth_test->flags |= ETH_TEST_FL_FAILED;
-
-               e1000e_reset(adapter);
-               if (e1000_intr_test(adapter, &data[2]))
-                       eth_test->flags |= ETH_TEST_FL_FAILED;
-
-               e1000e_reset(adapter);
-               if (e1000_loopback_test(adapter, &data[3]))
-                       eth_test->flags |= ETH_TEST_FL_FAILED;
-
-               /* force this routine to wait until autoneg complete/timeout */
-               adapter->hw.phy.autoneg_wait_to_complete = 1;
-               e1000e_reset(adapter);
-               adapter->hw.phy.autoneg_wait_to_complete = 0;
-
-               if (e1000_link_test(adapter, &data[4]))
-                       eth_test->flags |= ETH_TEST_FL_FAILED;
-
-               /* restore speed, duplex, autoneg settings */
-               adapter->hw.phy.autoneg_advertised = autoneg_advertised;
-               adapter->hw.mac.forced_speed_duplex = forced_speed_duplex;
-               adapter->hw.mac.autoneg = autoneg;
-               e1000e_reset(adapter);
-
-               clear_bit(__E1000_TESTING, &adapter->state);
-               if (if_running)
-                       e1000e_open(netdev);
+               // LSC, RXO, MDAC, SRPD, ACK, MNG
+               ew32(ICS, E1000_ICR_RXO | E1000_ICR_OTHER);
        } else {
-               /* Online tests */
-
-               e_info("online testing starting\n");
-
-               /* register, eeprom, intr and loopback tests not run online */
-               data[0] = 0;
-               data[1] = 0;
-               data[2] = 0;
-               data[3] = 0;
-
-               if (e1000_link_test(adapter, &data[4]))
-                       eth_test->flags |= ETH_TEST_FL_FAILED;
-
-               clear_bit(__E1000_TESTING, &adapter->state);
-       }
-
-       if (!if_running) {
-               e1000e_reset(adapter);
-
-               if (adapter->flags & FLAG_HAS_AMT)
-                       e1000e_release_hw_control(adapter);
+               ew32(ICS, E1000_ICR_LSC | E1000_ICR_OTHER);
        }
 
-       msleep_interruptible(4 * 1000);
+       clear_bit(__E1000_TESTING, &adapter->state);
 
        pm_runtime_put_sync(netdev->dev.parent);
 }
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index fffc1f0e3895..5b3a0feaf052 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -46,6 +46,10 @@
 
 #include "e1000.h"
 
+DEFINE_RATELIMIT_STATE(rx_ratelimit_state, 2 * HZ, 1);
+DEFINE_RATELIMIT_STATE(other_ratelimit_state, 2 * HZ, 1);
+DEFINE_RATELIMIT_STATE(other_ratelimit_state2, 2 * HZ, 1);
+
 #define DRV_EXTRAVERSION "-k"
 
 #define DRV_VERSION "3.2.6" DRV_EXTRAVERSION
@@ -936,6 +940,9 @@ static bool e1000_clean_rx_irq(struct e1000_ring *rx_ring, 
int *work_done,
        int cleaned_count = 0;
        bool cleaned = false;
        unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+       static unsigned int count;
+
+       mdelay(10);
 
        i = rx_ring->next_to_clean;
        rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
@@ -1067,6 +1074,16 @@ static bool e1000_clean_rx_irq(struct e1000_ring 
*rx_ring, int *work_done,
 
        adapter->total_rx_bytes += total_rx_bytes;
        adapter->total_rx_packets += total_rx_packets;
+
+       count++;
+       if (__ratelimit(&rx_ratelimit_state)) {
+               static unsigned int max;
+               max = max(max, total_rx_packets);
+               trace_printk("rx %u now, max %u, %u rounds\n",
+                            total_rx_packets, max, count);
+               count = 0;
+       }
+
        return cleaned;
 }
 
@@ -1914,14 +1931,30 @@ static irqreturn_t e1000_msix_other(int __always_unused 
irq, void *data)
        struct net_device *netdev = data;
        struct e1000_adapter *adapter = netdev_priv(netdev);
        struct e1000_hw *hw = &adapter->hw;
-       u32 icr = er32(ICR);
+       static unsigned int count;
+       u32 icr2, icr = er32(ICR);
 
        /* Certain events (such as RXO) which trigger Other do not set
         * INT_ASSERTED. In that case, read to clear of icr does not take
         * place.
         */
+       /*
        if (!(icr & E1000_ICR_INT_ASSERTED))
                ew32(ICR, E1000_ICR_OTHER);
+       */
+
+       icr2 = er32(ICR);
+
+       count++;
+       if (__ratelimit(&other_ratelimit_state)) {
+               trace_printk("icr 0x%08x icr2 0x%08x count %u\n", icr, icr2,
+                            count);
+               count = 0;
+       }
+       if (icr & E1000_ICR_RXO && icr & E1000_ICR_INT_ASSERTED &&
+           __ratelimit(&other_ratelimit_state2)) {
+               trace_printk("special icr 0x%08x icr2 0x%08x\n", icr, icr2);
+       }
 
        if (icr & adapter->eiac_mask)
                ew32(ICS, (icr & adapter->eiac_mask));

Reply via email to