On Tue, Feb 11, 2020 at 06:44:12AM +1000, Jonathan Matthew wrote: > On Mon, Feb 10, 2020 at 02:04:06PM +0100, Mark Kettenis wrote: > > Jonathan Matthew schreef op 2020-02-10 13:19: > > > I'm trying to use aggr on top of nep(4), which has turned up a few bugs. > > > Firstly, the nic appears to report rx errors in the second interrupt > > > state vector, which the driver doesn't expect, so I get this: > > > > > > nep3: nep_intr 0 8 0 > > > > > > and then, since the interrupt wasn't rearmed, the interface stops. > > > > > > Then, the driver doesn't clear rx error interrupt sources, so rearming > > > results in an interrupt storm. The illumos nxge driver masks off > > > read-only > > > bits in the rx status register and writes it back, clearing all active > > > interrupt sources, so I've done that here too. > > > > > > ok? > > > > > > > > > Index: if_nep.c > > > =================================================================== > > > RCS file: /cvs/src/sys/dev/pci/if_nep.c,v > > > retrieving revision 1.31 > > > diff -u -p -r1.31 if_nep.c > > > --- if_nep.c 9 Nov 2018 14:14:31 -0000 1.31 > > > +++ if_nep.c 10 Feb 2020 11:57:44 -0000 > > > @@ -333,11 +333,24 @@ > > > #define RX_DMA_ENT_MSK(chan) (DMC + 0x00068 + (chan) * 0x00200) > > > #define RX_DMA_ENT_MSK_RBR_EMPTY (1ULL << 3) > > > #define RX_DMA_CTL_STAT(chan) (DMC + 0x00070 + (chan) * 0x00200) > > > +#define RX_DMA_CTL_STAT_DC_FIFO_ERR (1ULL << 48) > > > #define RX_DMA_CTL_STAT_MEX (1ULL << 47) > > > #define RX_DMA_CTL_STAT_RCRTHRES (1ULL << 46) > > > #define RX_DMA_CTL_STAT_RCRTO (1ULL << 45) > > > +#define RX_DMA_CTL_STAT_PORT_DROP_PKT (1ULL << 42) > > > +#define RX_DMA_CTL_STAT_WRED_DROP (1ULL << 41) > > > +#define RX_DMA_CTL_STAT_RBR_PRE_EMTY (1ULL << 40) > > > +#define RX_DMA_CTL_STAT_RCR_SHDW_FULL (1ULL << 39) > > > #define RX_DMA_CTL_STAT_RBR_EMPTY (1ULL << 35) > > > #define RX_DMA_CTL_STAT_PTRREAD_SHIFT 16 > > > +#define RX_DMA_CTL_STAT_WR1C_BITS (RX_DMA_CTL_STAT_RBR_EMPTY | \ > > > + RX_DMA_CTL_STAT_RCR_SHDW_FULL | \ > > > + RX_DMA_CTL_STAT_RBR_PRE_EMTY | \ > > > + RX_DMA_CTL_STAT_WRED_DROP | \ > > > + RX_DMA_CTL_STAT_PORT_DROP_PKT | \ > > > + RX_DMA_CTL_STAT_RCRTO | \ > > > + RX_DMA_CTL_STAT_RCRTHRES | \ > > > + RX_DMA_CTL_STAT_DC_FIFO_ERR) > > > #define RX_DMA_CTL_STAT_DBG(chan) (DMC + 0x00098 + (chan) * 0x00200) > > > > > > #define TX_LOG_PAGE_VLD(chan) (FZC_DMC + 0x40000 + (chan) * 0x00200) > > > @@ -959,7 +972,7 @@ nep_intr(void *arg) > > > rearm = 1; > > > } > > > > > > - if (sv0 & (1ULL << LDN_RXDMA(sc->sc_port))) { > > > + if ((sv0 | sv1) & (1ULL << LDN_RXDMA(sc->sc_port))) { > > > > This doesn't make sense to me. > > > > I suppose you see a bit getting set in sv1. Which bit is that? > > The same bit that gets set in sv0 under normal rx conditions. > For nep3, sv1 is 8, on nep1 it's 2. You can see the illumos driver > doing the same thing here: > > https://github.com/illumos/illumos-gate/blob/master/usr/src/uts/common/io/nxge/nxge_hw.c#L274
Or, we could mask off the non-fatal rx error interrupts, which also fixes the problem I'm seeing. Does this look better to you? Index: if_nep.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_nep.c,v retrieving revision 1.31 diff -u -p -r1.31 if_nep.c --- if_nep.c 9 Nov 2018 14:14:31 -0000 1.31 +++ if_nep.c 12 Feb 2020 22:10:56 -0000 @@ -332,12 +332,29 @@ #define RX_DMA_ENT_MSK(chan) (DMC + 0x00068 + (chan) * 0x00200) #define RX_DMA_ENT_MSK_RBR_EMPTY (1ULL << 3) +#define RX_DMA_ENT_MSK_WRED_DROP (1ULL << 9) +#define RX_DMA_ENT_MSK_PTDROP_PKT (1ULL << 10) +#define RX_DMA_ENT_MSK_RBR_PRE_PAR (1ULL << 11) +#define RX_DMA_ENT_MSK_RCR_SHA_PAR (1ULL << 12) #define RX_DMA_CTL_STAT(chan) (DMC + 0x00070 + (chan) * 0x00200) +#define RX_DMA_CTL_STAT_DC_FIFO_ERR (1ULL << 48) #define RX_DMA_CTL_STAT_MEX (1ULL << 47) #define RX_DMA_CTL_STAT_RCRTHRES (1ULL << 46) #define RX_DMA_CTL_STAT_RCRTO (1ULL << 45) +#define RX_DMA_CTL_STAT_PORT_DROP_PKT (1ULL << 42) +#define RX_DMA_CTL_STAT_WRED_DROP (1ULL << 41) +#define RX_DMA_CTL_STAT_RBR_PRE_EMTY (1ULL << 40) +#define RX_DMA_CTL_STAT_RCR_SHDW_FULL (1ULL << 39) #define RX_DMA_CTL_STAT_RBR_EMPTY (1ULL << 35) #define RX_DMA_CTL_STAT_PTRREAD_SHIFT 16 +#define RX_DMA_CTL_STAT_WR1C_BITS (RX_DMA_CTL_STAT_RBR_EMPTY | \ + RX_DMA_CTL_STAT_RCR_SHDW_FULL | \ + RX_DMA_CTL_STAT_RBR_PRE_EMTY | \ + RX_DMA_CTL_STAT_WRED_DROP | \ + RX_DMA_CTL_STAT_PORT_DROP_PKT | \ + RX_DMA_CTL_STAT_RCRTO | \ + RX_DMA_CTL_STAT_RCRTHRES | \ + RX_DMA_CTL_STAT_DC_FIFO_ERR) #define RX_DMA_CTL_STAT_DBG(chan) (DMC + 0x00098 + (chan) * 0x00200) #define TX_LOG_PAGE_VLD(chan) (FZC_DMC + 0x40000 + (chan) * 0x00200) @@ -990,8 +1007,8 @@ nep_rx_proc(struct nep_softc *sc) int idx, len, i; val = nep_read(sc, RX_DMA_CTL_STAT(sc->sc_port)); - nep_write(sc, RX_DMA_CTL_STAT(sc->sc_port), - RX_DMA_CTL_STAT_RCRTHRES | RX_DMA_CTL_STAT_RCRTO); + val &= RX_DMA_CTL_STAT_WR1C_BITS; + nep_write(sc, RX_DMA_CTL_STAT(sc->sc_port), val); bus_dmamap_sync(sc->sc_dmat, NEP_DMA_MAP(sc->sc_rcring), 0, NEP_DMA_LEN(sc->sc_rcring), BUS_DMASYNC_POSTREAD); @@ -1301,7 +1318,9 @@ nep_init_rx_channel(struct nep_softc *sc (sc->sc_port << RX_LOG_PAGE_VLD_FUNC_SHIFT) | RX_LOG_PAGE_VLD_PAGE0 | RX_LOG_PAGE_VLD_PAGE1); - nep_write(sc, RX_DMA_ENT_MSK(chan), RX_DMA_ENT_MSK_RBR_EMPTY); + nep_write(sc, RX_DMA_ENT_MSK(chan), RX_DMA_ENT_MSK_RBR_EMPTY | + RX_DMA_ENT_MSK_WRED_DROP | RX_DMA_ENT_MSK_PTDROP_PKT | + RX_DMA_ENT_MSK_RBR_PRE_PAR | RX_DMA_ENT_MSK_RCR_SHA_PAR); nep_write(sc, RX_DMA_CTL_STAT(chan), RX_DMA_CTL_STAT_MEX); val = NEP_DMA_DVA(sc->sc_rxmbox) >> 32;