On Mon, Jan 18, 2016 at 20:25 +0100, Mike Belopuhov wrote:
> On Sun, Jan 17, 2016 at 20:46 -0800, Jonathon Sisson wrote:
> > Hi,
> >
> > First off, thank you for OpenBSD in general, and thank you specifically
> > for the PV drivers on OpenBSD =) The day of migrating workloads to AWS
> > gets ever closer for me, and I appreciate everything the OpenBSD dev
> > team does.
> >
> > I've found what appears to be a repeatable crash that results in this:
> >
> > panic: xnf0: save vs spell: 214
> >
> > Stopped at Debugger+0x9: leave
> > TID PID UID PRFLAGS PFLAGS CPU COMMAND
> > 14532 9243 0 0x3 0x4000000 1 python2.7
> > * 7215 9243 0 0x3 0x4000000 0 python2.7
> >
> > Debugger() at Debugger+0x9
> > panic() at panic+0xfe
> > xnf_encap() at xnf_encap+0x1a9
> > xnf_start() at xnf_start+0x7f
> > ifq_serialize() at ifq_serialize+0xd9
> > if_enqueue() at if_enqueue+0x71
> > ether_output() at ether_output+0x166
> > ip_output() at ip_output+0x6d3
> > tcp_output() at tcp_output+0x87e
> > tcp_usrreq() at tcp_usrreq+0x3fc
> > sosend() at sosend+0x3d8
> > dofilewritev() at dofilewritev+0x205
> > sys_write() at sys_write+0x89
> > syscall() at syscall+0x368
> > --- syscall (number 4) ---
> > end of kernel
> > end trace frame: 0x9a8c96a2800, count: 1
> > 0x9a91790279a:
> > --db_more--
> >
> > I'm unable to run further commands at the console, as AWS does not
> > provide console.
> >
> > I'm using this test machine to build CURRENT and upload it to an s3
> > bucket that I've been using for STABLE builds. The python code is
> > the awscli installed via py-pip running on Python 2.7.11. The precise
> > command is:
> >
> > aws s3 sync /usr/rel/ s3://$AWS_BUCKET_NAME/path/
> >
> > If there is any further testing I can provide, I am more than happy
> > to provide any details you need.
> >
> > -Jonathon
> >
>
> Can you please try the diff below on top of a -current kernel
> (I've pushed some additional Xen fixes just now).
>
> You should be able to copy the kernel into the AWS instance.
>
> My math wasn't correct here and txeof would unload a chain before
> we would've processed all descriptors/fragments.
>
A slight amendment to the diff (forgot one chunk).
diff --git sys/dev/pv/if_xnf.c sys/dev/pv/if_xnf.c
index 02761d8..7c0e1fb 100644
--- sys/dev/pv/if_xnf.c
+++ sys/dev/pv/if_xnf.c
@@ -489,11 +489,11 @@ xnf_encap(struct xnf_softc *sc, struct mbuf *m, uint32_t
*prod)
struct xnf_tx_ring *txr = sc->sc_tx_ring;
union xnf_tx_desc *txd;
bus_dmamap_t dmap;
int error, i, n = 0;
- if (((txr->txr_cons - *prod - 1) & (XNF_TX_DESC - 1)) < XNF_TX_FRAG) {
+ if ((XNF_TX_DESC - (*prod - txr->txr_cons)) < XNF_TX_FRAG) {
error = ENOENT;
goto errout;
}
i = *prod & (XNF_TX_DESC - 1);
@@ -513,21 +513,22 @@ xnf_encap(struct xnf_softc *sc, struct mbuf *m, uint32_t
*prod)
i = *prod & (XNF_TX_DESC - 1);
if (sc->sc_tx_buf[i])
panic("%s: save vs spell: %d\n", ifp->if_xname, i);
txd = &txr->txr_desc[i];
if (n == 0) {
- sc->sc_tx_buf[i] = m;
if (0 && m->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT)
txd->txd_req.txq_flags = XNF_TXF_CSUM |
XNF_TXF_VALID;
txd->txd_req.txq_size = m->m_pkthdr.len;
} else
txd->txd_req.txq_size = dmap->dm_segs[n].ds_len;
if (n != dmap->dm_nsegs - 1)
txd->txd_req.txq_flags |= XNF_TXF_CHUNK;
txd->txd_req.txq_ref = dmap->dm_segs[n].ds_addr;
txd->txd_req.txq_offset = dmap->dm_segs[n].ds_offset;
+ sc->sc_tx_buf[i] = m;
+ m = m->m_next;
}
ifp->if_opackets++;
return (0);
@@ -583,11 +584,11 @@ xnf_txeof(struct xnf_softc *sc)
if (sc->sc_tx_buf[i]) {
dmap = sc->sc_tx_dmap[i];
bus_dmamap_unload(sc->sc_dmat, dmap);
m = sc->sc_tx_buf[i];
sc->sc_tx_buf[i] = NULL;
- m_freem(m);
+ m_free(m);
}
pkts++;
}
if (pkts > 0) {
@@ -934,11 +935,11 @@ xnf_tx_ring_destroy(struct xnf_softc *sc)
if (sc->sc_tx_dmap[i] == NULL)
continue;
bus_dmamap_unload(sc->sc_dmat, sc->sc_tx_dmap[i]);
if (sc->sc_tx_buf[i] == NULL)
continue;
- m_freem(sc->sc_tx_buf[i]);
+ m_free(sc->sc_tx_buf[i]);
sc->sc_tx_buf[i] = NULL;
}
for (i = 0; i < XNF_TX_DESC; i++) {
if (sc->sc_tx_dmap[i] == NULL)
continue;