On Tue, Feb 11, 2020 at 06:44:12AM +1000, Jonathan Matthew wrote:
> On Mon, Feb 10, 2020 at 02:04:06PM +0100, Mark Kettenis wrote:
> > Jonathan Matthew schreef op 2020-02-10 13:19:
> > > I'm trying to use aggr on top of nep(4), which has turned up a few bugs.
> > > Firstly, the nic appears to report rx errors in the second interrupt
> > > state vector, which the driver doesn't expect, so I get this:
> > >
> > > nep3: nep_intr 0 8 0
> > >
> > > and then, since the interrupt wasn't rearmed, the interface stops.
> > >
> > > Then, the driver doesn't clear rx error interrupt sources, so rearming
> > > results in an interrupt storm. The illumos nxge driver masks off
> > > read-only
> > > bits in the rx status register and writes it back, clearing all active
> > > interrupt sources, so I've done that here too.
> > >
> > > ok?
> > >
> > >
> > > Index: if_nep.c
> > > ===================================================================
> > > RCS file: /cvs/src/sys/dev/pci/if_nep.c,v
> > > retrieving revision 1.31
> > > diff -u -p -r1.31 if_nep.c
> > > --- if_nep.c 9 Nov 2018 14:14:31 -0000 1.31
> > > +++ if_nep.c 10 Feb 2020 11:57:44 -0000
> > > @@ -333,11 +333,24 @@
> > > #define RX_DMA_ENT_MSK(chan) (DMC + 0x00068 + (chan) * 0x00200)
> > > #define RX_DMA_ENT_MSK_RBR_EMPTY (1ULL << 3)
> > > #define RX_DMA_CTL_STAT(chan) (DMC + 0x00070 + (chan) * 0x00200)
> > > +#define RX_DMA_CTL_STAT_DC_FIFO_ERR (1ULL << 48)
> > > #define RX_DMA_CTL_STAT_MEX (1ULL << 47)
> > > #define RX_DMA_CTL_STAT_RCRTHRES (1ULL << 46)
> > > #define RX_DMA_CTL_STAT_RCRTO (1ULL << 45)
> > > +#define RX_DMA_CTL_STAT_PORT_DROP_PKT (1ULL << 42)
> > > +#define RX_DMA_CTL_STAT_WRED_DROP (1ULL << 41)
> > > +#define RX_DMA_CTL_STAT_RBR_PRE_EMTY (1ULL << 40)
> > > +#define RX_DMA_CTL_STAT_RCR_SHDW_FULL (1ULL << 39)
> > > #define RX_DMA_CTL_STAT_RBR_EMPTY (1ULL << 35)
> > > #define RX_DMA_CTL_STAT_PTRREAD_SHIFT 16
> > > +#define RX_DMA_CTL_STAT_WR1C_BITS (RX_DMA_CTL_STAT_RBR_EMPTY | \
> > > + RX_DMA_CTL_STAT_RCR_SHDW_FULL | \
> > > + RX_DMA_CTL_STAT_RBR_PRE_EMTY | \
> > > + RX_DMA_CTL_STAT_WRED_DROP | \
> > > + RX_DMA_CTL_STAT_PORT_DROP_PKT | \
> > > + RX_DMA_CTL_STAT_RCRTO | \
> > > + RX_DMA_CTL_STAT_RCRTHRES | \
> > > + RX_DMA_CTL_STAT_DC_FIFO_ERR)
> > > #define RX_DMA_CTL_STAT_DBG(chan) (DMC + 0x00098 + (chan) * 0x00200)
> > >
> > > #define TX_LOG_PAGE_VLD(chan) (FZC_DMC + 0x40000 + (chan) * 0x00200)
> > > @@ -959,7 +972,7 @@ nep_intr(void *arg)
> > > rearm = 1;
> > > }
> > >
> > > - if (sv0 & (1ULL << LDN_RXDMA(sc->sc_port))) {
> > > + if ((sv0 | sv1) & (1ULL << LDN_RXDMA(sc->sc_port))) {
> >
> > This doesn't make sense to me.
> >
> > I suppose you see a bit getting set in sv1. Which bit is that?
>
> The same bit that gets set in sv0 under normal rx conditions.
> For nep3, sv1 is 8, on nep1 it's 2. You can see the illumos driver
> doing the same thing here:
>
> https://github.com/illumos/illumos-gate/blob/master/usr/src/uts/common/io/nxge/nxge_hw.c#L274
Or, we could mask off the non-fatal rx error interrupts,
which also fixes the problem I'm seeing.
Does this look better to you?
Index: if_nep.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_nep.c,v
retrieving revision 1.31
diff -u -p -r1.31 if_nep.c
--- if_nep.c 9 Nov 2018 14:14:31 -0000 1.31
+++ if_nep.c 12 Feb 2020 22:10:56 -0000
@@ -332,12 +332,29 @@
#define RX_DMA_ENT_MSK(chan) (DMC + 0x00068 + (chan) * 0x00200)
#define RX_DMA_ENT_MSK_RBR_EMPTY (1ULL << 3)
+#define RX_DMA_ENT_MSK_WRED_DROP (1ULL << 9)
+#define RX_DMA_ENT_MSK_PTDROP_PKT (1ULL << 10)
+#define RX_DMA_ENT_MSK_RBR_PRE_PAR (1ULL << 11)
+#define RX_DMA_ENT_MSK_RCR_SHA_PAR (1ULL << 12)
#define RX_DMA_CTL_STAT(chan) (DMC + 0x00070 + (chan) * 0x00200)
+#define RX_DMA_CTL_STAT_DC_FIFO_ERR (1ULL << 48)
#define RX_DMA_CTL_STAT_MEX (1ULL << 47)
#define RX_DMA_CTL_STAT_RCRTHRES (1ULL << 46)
#define RX_DMA_CTL_STAT_RCRTO (1ULL << 45)
+#define RX_DMA_CTL_STAT_PORT_DROP_PKT (1ULL << 42)
+#define RX_DMA_CTL_STAT_WRED_DROP (1ULL << 41)
+#define RX_DMA_CTL_STAT_RBR_PRE_EMTY (1ULL << 40)
+#define RX_DMA_CTL_STAT_RCR_SHDW_FULL (1ULL << 39)
#define RX_DMA_CTL_STAT_RBR_EMPTY (1ULL << 35)
#define RX_DMA_CTL_STAT_PTRREAD_SHIFT 16
+#define RX_DMA_CTL_STAT_WR1C_BITS (RX_DMA_CTL_STAT_RBR_EMPTY | \
+ RX_DMA_CTL_STAT_RCR_SHDW_FULL | \
+ RX_DMA_CTL_STAT_RBR_PRE_EMTY | \
+ RX_DMA_CTL_STAT_WRED_DROP | \
+ RX_DMA_CTL_STAT_PORT_DROP_PKT | \
+ RX_DMA_CTL_STAT_RCRTO | \
+ RX_DMA_CTL_STAT_RCRTHRES | \
+ RX_DMA_CTL_STAT_DC_FIFO_ERR)
#define RX_DMA_CTL_STAT_DBG(chan) (DMC + 0x00098 + (chan) * 0x00200)
#define TX_LOG_PAGE_VLD(chan) (FZC_DMC + 0x40000 + (chan) * 0x00200)
@@ -990,8 +1007,8 @@ nep_rx_proc(struct nep_softc *sc)
int idx, len, i;
val = nep_read(sc, RX_DMA_CTL_STAT(sc->sc_port));
- nep_write(sc, RX_DMA_CTL_STAT(sc->sc_port),
- RX_DMA_CTL_STAT_RCRTHRES | RX_DMA_CTL_STAT_RCRTO);
+ val &= RX_DMA_CTL_STAT_WR1C_BITS;
+ nep_write(sc, RX_DMA_CTL_STAT(sc->sc_port), val);
bus_dmamap_sync(sc->sc_dmat, NEP_DMA_MAP(sc->sc_rcring), 0,
NEP_DMA_LEN(sc->sc_rcring), BUS_DMASYNC_POSTREAD);
@@ -1301,7 +1318,9 @@ nep_init_rx_channel(struct nep_softc *sc
(sc->sc_port << RX_LOG_PAGE_VLD_FUNC_SHIFT) |
RX_LOG_PAGE_VLD_PAGE0 | RX_LOG_PAGE_VLD_PAGE1);
- nep_write(sc, RX_DMA_ENT_MSK(chan), RX_DMA_ENT_MSK_RBR_EMPTY);
+ nep_write(sc, RX_DMA_ENT_MSK(chan), RX_DMA_ENT_MSK_RBR_EMPTY |
+ RX_DMA_ENT_MSK_WRED_DROP | RX_DMA_ENT_MSK_PTDROP_PKT |
+ RX_DMA_ENT_MSK_RBR_PRE_PAR | RX_DMA_ENT_MSK_RCR_SHA_PAR);
nep_write(sc, RX_DMA_CTL_STAT(chan), RX_DMA_CTL_STAT_MEX);
val = NEP_DMA_DVA(sc->sc_rxmbox) >> 32;